From d4c33fba2f3433932e72f17233d8de129910b526 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Tue, 7 Jun 2022 20:49:27 +0800 Subject: [PATCH 01/16] feat: create tsma result stable --- include/common/taosdef.h | 1 + include/common/tmsg.h | 61 +++---- source/common/src/tdatablock.c | 40 +++-- source/common/src/tmsg.c | 36 ++-- source/dnode/mnode/impl/inc/mndDef.h | 53 +++--- source/dnode/mnode/impl/src/mndSma.c | 87 +++++++++- source/dnode/mnode/impl/src/mndStream.c | 4 + source/dnode/vnode/src/meta/metaSma.c | 4 +- source/dnode/vnode/src/sma/smaRollup.c | 4 +- source/dnode/vnode/src/sma/smaTimeRange2.c | 188 ++------------------- source/dnode/vnode/src/tq/tqPush.c | 12 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +- 12 files changed, 229 insertions(+), 269 deletions(-) diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 516df71b0b..60b1dc6c10 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -98,6 +98,7 @@ extern char *qtypeStr[]; #undef TD_DEBUG_PRINT_ROW #undef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS #undef TD_DEBUG_PRINT_TAG +#define TD_DEBUG_SMA_ID 123456 #ifdef __cplusplus } diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4a3c4b0c3f..72f36a6995 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2329,24 +2329,28 @@ typedef struct { } SVgEpSet; typedef struct { - int8_t version; // for compatibility(default 0) - int8_t intervalUnit; // MACRO: TIME_UNIT_XXX - int8_t slidingUnit; // MACRO: TIME_UNIT_XXX - int8_t timezoneInt; // sma data expired if timezone changes. - int32_t dstVgId; - char indexName[TSDB_INDEX_NAME_LEN]; - int32_t exprLen; - int32_t tagsFilterLen; - int32_t numOfVgroups; - int64_t indexUid; - tb_uid_t tableUid; // super/child/common table uid - int64_t interval; - int64_t offset; // use unit by precision of DB - int64_t sliding; - char* expr; // sma expression - char* tagsFilter; - SVgEpSet* pVgEpSet; -} STSma; // Time-range-wise SMA + int8_t version; // for compatibility(default 0) + int8_t intervalUnit; // MACRO: TIME_UNIT_XXX + int8_t slidingUnit; // MACRO: TIME_UNIT_XXX + int8_t timezoneInt; // sma data expired if timezone changes. + int32_t dstVgId; + char indexName[TSDB_INDEX_NAME_LEN]; + int32_t exprLen; + int32_t tagsFilterLen; + int32_t numOfVgroups; // for dstVgroup + int64_t indexUid; + tb_uid_t tableUid; // super/child/common table uid + tb_uid_t dstTbUid; // for dstVgroup + int64_t interval; + int64_t offset; // use unit by precision of DB + int64_t sliding; + char* dstTbName; // for dstVgroup + char* expr; // sma expression + char* tagsFilter; + SVgEpSet* pVgEpSet; // for dstVgroup + SSchemaWrapper schemaRow; // for dstVgroup + SSchemaWrapper schemaTag; // for dstVgroup +} STSma; // Time-range-wise SMA typedef STSma SVCreateTSmaReq; @@ -2493,14 +2497,14 @@ int32_t tSerializeSTableIndexReq(void* buf, int32_t bufLen, STableIndexReq* pReq int32_t tDeserializeSTableIndexReq(void* buf, int32_t bufLen, STableIndexReq* pReq); typedef struct { - int8_t intervalUnit; - int8_t slidingUnit; - int64_t interval; - int64_t offset; - int64_t sliding; - int64_t dstTbUid; - int32_t dstVgId; // for stream - char* expr; + int8_t intervalUnit; + int8_t slidingUnit; + int64_t interval; + int64_t offset; + int64_t sliding; + int64_t dstTbUid; + int32_t dstVgId; // for stream + char* expr; } STableIndexInfo; typedef struct { @@ -2510,7 +2514,6 @@ typedef struct { int32_t tSerializeSTableIndexRsp(void* buf, int32_t bufLen, const STableIndexRsp* pRsp); int32_t tDeserializeSTableIndexRsp(void* buf, int32_t bufLen, STableIndexRsp* pRsp); - typedef struct { int8_t mqMsgType; int32_t code; @@ -2751,8 +2754,8 @@ typedef struct { char* msg; } SVDeleteReq; -int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq); -int32_t tDeserializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq); +int32_t tSerializeSVDeleteReq(void* buf, int32_t bufLen, SVDeleteReq* pReq); +int32_t tDeserializeSVDeleteReq(void* buf, int32_t bufLen, SVDeleteReq* pReq); typedef struct { int64_t affectedRows; diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 9caa9a73a5..2202b9579d 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1624,25 +1624,31 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks break; default: if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) { - char tv[8] = {0}; - if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) { - float v = 0; - GET_TYPED_DATA(v, float, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) { - double v = 0; - GET_TYPED_DATA(v, double, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) { - int64_t v = 0; - GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); + if (pCol->type == pColInfoData->info.type) { + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset, + k); } else { - uint64_t v = 0; - GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); + char tv[8] = {0}; + if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) { + float v = 0; + GET_TYPED_DATA(v, float, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) { + double v = 0; + GET_TYPED_DATA(v, double, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) { + int64_t v = 0; + GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else { + uint64_t v = 0; + GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset, + k); } - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset, k); } else { uError("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); TASSERT(0); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 51c40827b5..bbd7c2e7a0 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3844,6 +3844,8 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeI32(pCoder, pSma->numOfVgroups) < 0) return -1; if (tEncodeI64(pCoder, pSma->indexUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->tableUid) < 0) return -1; + if (tEncodeI64(pCoder, pSma->dstTbUid) < 0) return -1; + if (tEncodeCStr(pCoder, pSma->dstTbName) < 0) return -1; if (tEncodeI64(pCoder, pSma->interval) < 0) return -1; if (tEncodeI64(pCoder, pSma->offset) < 0) return -1; if (tEncodeI64(pCoder, pSma->sliding) < 0) return -1; @@ -3853,17 +3855,24 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (pSma->tagsFilterLen > 0) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - if (tEncodeI8(pCoder, numOfEps) < 0) return -1; - for (int32_t n = 0; n < numOfEps; ++n) { - const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; - if (tEncodeU16(pCoder, pEp->port) < 0) return -1; + + if (pSma->numOfVgroups) { // only needed in dstVgroup + for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { + if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; + if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; + int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; + if (tEncodeI8(pCoder, numOfEps) < 0) return -1; + for (int32_t n = 0; n < numOfEps; ++n) { + const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; + if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; + if (tEncodeU16(pCoder, pEp->port) < 0) return -1; + } } + + tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); + tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); } + return 0; } @@ -3871,14 +3880,16 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeI8(pCoder, &pSma->version) < 0) return -1; if (tDecodeI8(pCoder, &pSma->intervalUnit) < 0) return -1; if (tDecodeI8(pCoder, &pSma->slidingUnit) < 0) return -1; - if (tDecodeI32(pCoder, &pSma->dstVgId) < 0) return -1; if (tDecodeI8(pCoder, &pSma->timezoneInt) < 0) return -1; + if (tDecodeI32(pCoder, &pSma->dstVgId) < 0) return -1; if (tDecodeCStrTo(pCoder, pSma->indexName) < 0) return -1; if (tDecodeI32(pCoder, &pSma->exprLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->tagsFilterLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->numOfVgroups) < 0) return -1; if (tDecodeI64(pCoder, &pSma->indexUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->tableUid) < 0) return -1; + if (tDecodeI64(pCoder, &pSma->dstTbUid) < 0) return -1; + if (tDecodeCStr(pCoder, &pSma->dstTbName) < 0) return -1; if (tDecodeI64(pCoder, &pSma->interval) < 0) return -1; if (tDecodeI64(pCoder, &pSma->offset) < 0) return -1; if (tDecodeI64(pCoder, &pSma->sliding) < 0) return -1; @@ -3892,7 +3903,7 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { + if (pSma->numOfVgroups > 0) { // only needed in dstVgroup pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); if (!pSma->pVgEpSet) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -3912,6 +3923,9 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeU16(pCoder, &pEp->port) < 0) return -1; } } + + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); } return 0; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 382f8dd55f..d21af87067 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -298,31 +298,34 @@ typedef struct { } SVgObj; typedef struct { - char name[TSDB_TABLE_FNAME_LEN]; - char stb[TSDB_TABLE_FNAME_LEN]; - char db[TSDB_DB_FNAME_LEN]; - int64_t createdTime; - int64_t uid; - int64_t stbUid; - int64_t dbUid; - int8_t intervalUnit; - int8_t slidingUnit; - int8_t timezone; - int32_t dstVgId; // for stream - int64_t dstTbUid; - int64_t interval; - int64_t offset; - int64_t sliding; - int32_t exprLen; // strlen + 1 - int32_t tagsFilterLen; - int32_t sqlLen; - int32_t astLen; - int32_t numOfVgroups; - char* expr; - char* tagsFilter; - char* sql; - char* ast; - SVgEpSet* pVgEpSet; + char name[TSDB_TABLE_FNAME_LEN]; + char stb[TSDB_TABLE_FNAME_LEN]; + char db[TSDB_DB_FNAME_LEN]; + char dstTbName[TSDB_TABLE_FNAME_LEN]; + int64_t createdTime; + int64_t uid; + int64_t stbUid; + int64_t dbUid; + int64_t dstTbUid; + int8_t intervalUnit; + int8_t slidingUnit; + int8_t timezone; + int32_t dstVgId; // for stream + int64_t interval; + int64_t offset; + int64_t sliding; + int32_t exprLen; // strlen + 1 + int32_t tagsFilterLen; + int32_t sqlLen; + int32_t astLen; + int32_t numOfVgroups; // for dstVgroup + char* expr; + char* tagsFilter; + char* sql; + char* ast; + SVgEpSet* pVgEpSet; // for dstVgroup + SSchemaWrapper schemaRow; // for dstVgroup + SSchemaWrapper schemaTag; // for dstVgroup } SSmaObj; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 1d47f8fc7a..ef9f5c3352 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -26,6 +26,7 @@ #include "mndTrans.h" #include "mndUser.h" #include "mndVgroup.h" +#include "parser.h" #include "tname.h" #define TSDB_SMA_VER_NUMBER 1 @@ -82,10 +83,12 @@ static SSdbRaw *mndSmaActionEncode(SSmaObj *pSma) { SDB_SET_BINARY(pRaw, dataPos, pSma->name, TSDB_TABLE_FNAME_LEN, _OVER) SDB_SET_BINARY(pRaw, dataPos, pSma->stb, TSDB_TABLE_FNAME_LEN, _OVER) SDB_SET_BINARY(pRaw, dataPos, pSma->db, TSDB_DB_FNAME_LEN, _OVER) + SDB_SET_BINARY(pRaw, dataPos, pSma->dstTbName, TSDB_DB_FNAME_LEN, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->createdTime, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->uid, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->stbUid, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->dbUid, _OVER) + SDB_SET_INT64(pRaw, dataPos, pSma->dstTbUid, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->intervalUnit, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->slidingUnit, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->timezone, _OVER) @@ -147,10 +150,12 @@ static SSdbRow *mndSmaActionDecode(SSdbRaw *pRaw) { SDB_GET_BINARY(pRaw, dataPos, pSma->name, TSDB_TABLE_FNAME_LEN, _OVER) SDB_GET_BINARY(pRaw, dataPos, pSma->stb, TSDB_TABLE_FNAME_LEN, _OVER) SDB_GET_BINARY(pRaw, dataPos, pSma->db, TSDB_DB_FNAME_LEN, _OVER) + SDB_GET_BINARY(pRaw, dataPos, pSma->dstTbName, TSDB_DB_FNAME_LEN, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->createdTime, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->uid, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->stbUid, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->dbUid, _OVER) + SDB_GET_INT64(pRaw, dataPos, &pSma->dstTbUid, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->intervalUnit, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->slidingUnit, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->timezone, _OVER) @@ -260,6 +265,8 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.tagsFilterLen = pSma->tagsFilterLen; req.indexUid = pSma->uid; req.tableUid = pSma->stbUid; + req.dstVgId = pSma->dstVgId; + req.dstTbUid = pSma->dstTbUid; req.interval = pSma->interval; req.offset = pSma->offset; req.sliding = pSma->sliding; @@ -267,7 +274,10 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.tagsFilter = pSma->tagsFilter; req.numOfVgroups = pSma->numOfVgroups; req.pVgEpSet = pSma->pVgEpSet; - + req.schemaRow = pSma->schemaRow; + req.schemaTag = pSma->schemaTag; + req.dstTbName = pSma->dstTbName; + // get length int32_t ret = 0; tEncodeSize(tEncodeSVCreateTSmaReq, &req, contLen, ret); @@ -425,15 +435,43 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, mndReleaseDnode(pMnode, pDnode); // todo add sma info here +#if 1 + SNode *pAst = NULL; + if (nodesStringToNode(pSma->ast, &pAst) < 0) { + return -1; + } + if (qExtractResultSchema(pAst, &pSma->schemaRow.nCols, &pSma->schemaRow.pSchema) != 0) { + nodesDestroyNode(pAst); + return -1; + } + nodesDestroyNode(pAst); + pSma->schemaRow.version = 1; + + // TODO: the schemaTag generated by qExtractResultXXX later. + pSma->schemaTag.nCols = 1; + pSma->schemaTag.version = 1; + pSma->schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); + if (!pSma->schemaTag.pSchema) { + nodesDestroyNode(pAst); + return -1; + } + pSma->schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; + pSma->schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; + pSma->schemaTag.pSchema[0].colId = pSma->schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; + pSma->schemaTag.pSchema[0].flags = 0; + snprintf(pSma->schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); + SVgEpSet *pVgEpSet = NULL; int32_t numOfVgroups = 0; if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { + nodesDestroyNode(pAst); return -1; } + nodesDestroyNode(pAst); pSma->pVgEpSet = pVgEpSet; pSma->numOfVgroups = numOfVgroups; - +#endif int32_t smaContLen = 0; void *pSmaReq = mndBuildVCreateSmaReq(pMnode, pVgroup, pSma, &smaContLen); if (pSmaReq == NULL) return -1; @@ -463,13 +501,20 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.stb, pStb->name, TSDB_TABLE_FNAME_LEN); memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); +#if 0 smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); +#endif + smaObj.uid = TD_DEBUG_SMA_ID; + char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; + snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "td.tsma.rst.tb.%s", pCreate->name); + memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); + smaObj.dstTbUid = mndGenerateUid(smaObj.dstTbName, TSDB_TABLE_FNAME_LEN); smaObj.stbUid = pStb->uid; smaObj.dbUid = pStb->dbUid; smaObj.intervalUnit = pCreate->intervalUnit; smaObj.slidingUnit = pCreate->slidingUnit; smaObj.timezone = pCreate->timezone; - smaObj.dstVgId = pCreate->dstVgId; + // smaObj.dstVgId = pCreate->dstVgId; smaObj.interval = pCreate->interval; smaObj.offset = pCreate->offset; smaObj.sliding = pCreate->sliding; @@ -502,6 +547,42 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.ast, pCreate->ast, smaObj.astLen); } +#if 1 // only for debugging, not needed in common vgroups, only needed in dstVgroup. + SNode *pAst = NULL; + if (nodesStringToNode(smaObj.ast, &pAst) < 0) { + return -1; + } + if (qExtractResultSchema(pAst, &smaObj.schemaRow.nCols, &smaObj.schemaRow.pSchema) != 0) { + nodesDestroyNode(pAst); + return -1; + } + smaObj.schemaRow.version = 1; + + smaObj.schemaTag.nCols = 1; + smaObj.schemaTag.version = 1; + smaObj.schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); + if (!smaObj.schemaTag.pSchema) { + nodesDestroyNode(pAst); + return -1; + } + smaObj.schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; + smaObj.schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; + smaObj.schemaTag.pSchema[0].colId = smaObj.schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; + smaObj.schemaTag.pSchema[0].flags = 0; + snprintf(smaObj.schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); + + nodesDestroyNode(pAst); + + SVgEpSet *pVgEpSet = NULL; + int32_t numOfVgroups = 0; + if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { + return -1; + } + + smaObj.pVgEpSet = pVgEpSet; + smaObj.numOfVgroups = numOfVgroups; +#endif + SStreamObj streamObj = {0}; tstrncpy(streamObj.name, pCreate->name, TSDB_STREAM_FNAME_LEN); tstrncpy(streamObj.sourceDb, pDb->name, TSDB_DB_FNAME_LEN); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 7abe9e3c0d..a331534a93 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -252,8 +252,12 @@ int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast } if (qExtractResultSchema(pAst, (int32_t *)&pStream->outputSchema.nCols, &pStream->outputSchema.pSchema) != 0) { + nodesDestroyNode(pAst); return -1; } + // free + nodesDestroyNode(pAst); + #if 0 printf("|"); diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 689cd511c4..910a0835bb 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -34,13 +34,13 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) { SMetaReader mr = {0}; // validate req + // save smaIndex metaReaderInit(&mr, pMeta, 0); if (metaGetTableEntryByUid(&mr, pCfg->indexUid) == 0) { -// TODO: just for pass case #if 1 terrno = TSDB_CODE_TDB_TSMA_ALREADY_EXIST; metaReaderClear(&mr); - return -1; + return -1; // don't goto _err; #else metaReaderClear(&mr); return 0; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index e738d3a408..05ad5c1cb2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -167,13 +167,13 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui */ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { SSma *pSma = pVnode->pSma; - SMeta *pMeta = pVnode->pMeta; - SMsgCb *pMsgCb = &pVnode->msgCb; if (!pReq->rollup) { smaTrace("vgId:%d, return directly since no rollup for stable %s %" PRIi64, SMA_VID(pSma), pReq->name, pReq->suid); return TSDB_CODE_SUCCESS; } + SMeta *pMeta = pVnode->pMeta; + SMsgCb *pMsgCb = &pVnode->msgCb; SRSmaParam *param = &pReq->pRSmaParam; if ((param->qmsg1Len == 0) && (param->qmsg2Len == 0)) { diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index 09adc1a6a2..df89ec1795 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -363,8 +363,8 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); +#if 0 const SArray *pDataBlocks = (const SArray *)msg; - int64_t testSkey = TSKEY_INITIAL_VAL; // TODO: destroy SSDataBlocks(msg) @@ -386,6 +386,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { smaWarn("vgId:%d insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } +#endif SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -403,178 +404,8 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { return TSDB_CODE_FAILED; } - STSma *pTSma = pItem->pTSma; - STSmaWriteH tSmaH = {0}; + STSma *pTSma = pItem->pTSma; - if (tdInitTSmaWriteH(&tSmaH, pSma, pDataBlocks, pTSma->interval, pTSma->intervalUnit) != 0) { - return TSDB_CODE_FAILED; - } - - char rPath[TSDB_FILENAME_LEN] = {0}; - char aPath[TSDB_FILENAME_LEN] = {0}; - snprintf(rPath, TSDB_FILENAME_LEN, "%s%s%" PRIi64, SMA_ENV_PATH(pEnv), TD_DIRSEP, indexUid); - tfsAbsoluteName(SMA_TFS(pSma), SMA_ENV_DID(pEnv), rPath, aPath); - if (!taosCheckExistFile(aPath)) { - if (tfsMkdirRecurAt(SMA_TFS(pSma), rPath, SMA_ENV_DID(pEnv)) != TSDB_CODE_SUCCESS) { - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } - - // Step 1: Judge the storage level and days - int32_t storageLevel = tdGetSmaStorageLevel(pCfg, tSmaH.interval); - int32_t minutePerFile = tdGetTSmaDays(pSma, tSmaH.interval, storageLevel); - - char smaKey[SMA_KEY_LEN] = {0}; // key: skey + groupId - char dataBuf[512] = {0}; // val: aggr data // TODO: handle 512 buffer? - void *pDataBuf = NULL; - int32_t sz = taosArrayGetSize(pDataBlocks); - for (int32_t i = 0; i < sz; ++i) { - SSDataBlock *pDataBlock = taosArrayGet(pDataBlocks, i); - int32_t colNum = pDataBlock->info.numOfCols; - int32_t rows = pDataBlock->info.rows; - int32_t rowSize = pDataBlock->info.rowSize; - int64_t groupId = pDataBlock->info.groupId; - for (int32_t j = 0; j < rows; ++j) { - printf("|"); - TSKEY skey = TSKEY_INITIAL_VAL; // the start key of TS window by interval - void *pSmaKey = &smaKey; - bool isStartKey = false; - - int32_t tlen = 0; // reset the len - pDataBuf = &dataBuf; // reset the buf - for (int32_t k = 0; k < colNum; ++k) { - SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - void *var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - if (!isStartKey) { - isStartKey = true; - skey = *(TSKEY *)var; - testSkey = skey; - printf("= skey %" PRIi64 " groupId = %" PRIi64 "|", skey, groupId); - tdEncodeTSmaKey(groupId, skey, &pSmaKey); - } else { - printf(" %" PRIi64 " |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - } - break; - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_UTINYINT: - printf(" %15d |", *(uint8_t *)var); - tlen += taosEncodeFixedU8(&pDataBuf, *(uint8_t *)var); - break; - case TSDB_DATA_TYPE_TINYINT: - printf(" %15d |", *(int8_t *)var); - tlen += taosEncodeFixedI8(&pDataBuf, *(int8_t *)var); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf(" %15d |", *(int16_t *)var); - tlen += taosEncodeFixedI16(&pDataBuf, *(int16_t *)var); - break; - case TSDB_DATA_TYPE_USMALLINT: - printf(" %15d |", *(uint16_t *)var); - tlen += taosEncodeFixedU16(&pDataBuf, *(uint16_t *)var); - break; - case TSDB_DATA_TYPE_INT: - printf(" %15d |", *(int32_t *)var); - tlen += taosEncodeFixedI32(&pDataBuf, *(int32_t *)var); - break; - case TSDB_DATA_TYPE_FLOAT: - printf(" %15f |", *(float *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(float)); - break; - case TSDB_DATA_TYPE_UINT: - printf(" %15u |", *(uint32_t *)var); - tlen += taosEncodeFixedU32(&pDataBuf, *(uint32_t *)var); - break; - case TSDB_DATA_TYPE_BIGINT: - printf(" %15ld |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf(" %15lf |", *(double *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(double)); - case TSDB_DATA_TYPE_UBIGINT: - printf(" %15lu |", *(uint64_t *)var); - tlen += taosEncodeFixedU64(&pDataBuf, *(uint64_t *)var); - break; - case TSDB_DATA_TYPE_NCHAR: { - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARBINARY: - // TODO: add binary/varbinary - TASSERT(0); - default: - printf("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); - TASSERT(0); - break; - } - } - printf("\n"); - // if ((tlen > 0) && (skey != TSKEY_INITIAL_VAL)) { - if (tlen > 0) { - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, minutePerFile, pCfg->precision)); - - // Step 2: Set the DFile for storage of SMA index, and iterate/split the TSma data and store to B+Tree index - // file - // - Set and open the DFile or the B+Tree file - // TODO: tsdbStartTSmaCommit(); - if (fid != tSmaH.dFile.fid) { - if (tSmaH.dFile.fid != SMA_IVLD_FID) { - tdSmaEndCommit(pEnv); - smaCloseDBF(&tSmaH.dFile); - } - tdSetTSmaDataFile(&tSmaH, indexUid, fid); - smaDebug("@@@ vgId:%d write to DBF %s, days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi32 - " queryKey:%" PRIi64, - SMA_VID(pSma), tSmaH.dFile.path, minutePerFile, tSmaH.interval, storageLevel, testSkey); - if (smaOpenDBF(pEnv->dbEnv, &tSmaH.dFile) != 0) { - smaWarn("vgId:%d open DB file %s failed since %s", SMA_VID(pSma), - tSmaH.dFile.path ? tSmaH.dFile.path : "path is NULL", tstrerror(terrno)); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - tdSmaBeginCommit(pEnv); - } - - if (tdInsertTSmaBlocks(&tSmaH, &smaKey, SMA_KEY_LEN, dataBuf, tlen, &pEnv->txn) != 0) { - smaWarn("vgId:%d insert tsma data blocks fail for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64 - " since %s", - SMA_VID(pSma), indexUid, skey, groupId, tstrerror(terrno)); - tdSmaEndCommit(pEnv); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d insert tsma data blocks success for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64, - SMA_VID(pSma), indexUid, skey, groupId); - // TODO:tsdbEndTSmaCommit(); - - // Step 3: reset the SSmaStat - tdResetExpiredWindow(pSma, pStat, indexUid, skey); - } else { - smaWarn("vgId:%d invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, - SMA_VID(pSma), skey, tlen, indexUid); - } - } - } - tdSmaEndCommit(pEnv); // TODO: not commit for every insert - tdDestroyTSmaWriteH(&tSmaH); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_SUCCESS; @@ -865,6 +696,19 @@ int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { return -1; } + if (TD_VID(pSma->pVnode) == pCfg->dstVgId) { + // create stable to save tsma result in dstVgId + SVCreateStbReq pReq = {0}; + pReq.name = pCfg->dstTbName; + pReq.suid = pCfg->dstTbUid; + pReq.schemaRow = pCfg->schemaRow; + pReq.schemaTag = pCfg->schemaTag; + + if (metaCreateSTable(SMA_META(pSma), version, &pReq) < 0) { + return -1; + } + } + tdTSmaAdd(pSma, 1); return 0; } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 9be94eb5b6..2f26fba50a 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -241,13 +241,13 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) if (tdUpdateExpireWindow(pTq->pVnode->pSma, msg, ver) != 0) { // TODO handle sma error } - void* data = taosMemoryMalloc(msgLen); - if (data == NULL) { - return -1; - } - memcpy(data, msg, msgLen); + // void* data = taosMemoryMalloc(msgLen); + // if (data == NULL) { + // return -1; + // } + // memcpy(data, msg, msgLen); - tqProcessStreamTrigger(pTq, data); + // tqProcessStreamTrigger(pTq, data); } return 0; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 7a9c9ef393..dd705011c4 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -174,11 +174,12 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } vTrace("vgId:%d, process %s request success, version: %" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); - +#if 1 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } +#endif // commit if need if (vnodeShouldCommit(pVnode)) { @@ -278,8 +279,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRp void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { // TODO - // blockDebugShowData(data); + // blockDebugShowData(data, __func__); +#if 0 tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); +#endif + tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, TD_DEBUG_SMA_ID, NULL); } void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { From 75182bb0ec20d38452718eec80f2abbdd699bb0f Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Wed, 8 Jun 2022 15:20:29 +0800 Subject: [PATCH 02/16] feat: tsma exp wnd clear --- include/common/tmsg.h | 22 +++++++++ include/common/tmsgdef.h | 1 + source/common/src/tmsg.c | 52 +++++++++++++++++++-- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/vnode/src/vnd/vnodeSvr.c | 49 +++++++++++++++++-- 5 files changed, 116 insertions(+), 9 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 72f36a6995..de64c5157c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2455,6 +2455,28 @@ int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder* pCoder, SVGetTsmaExpWndsReq* pReq); int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder* pCoder, const SVGetTsmaExpWndsRsp* pReq); int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder* pCoder, SVGetTsmaExpWndsRsp* pReq); +typedef struct { + int64_t nKeys; // n consecutive keys since skey + int64_t skey; +} SVTsmaExpWndItem; + +typedef struct { + int64_t indexUid; + int64_t version; // tsma result version + int64_t nItems; + SVTsmaExpWndItem items[]; +} SVClrTsmaExpWndsReq; + +typedef struct { + int64_t indexUid; + int32_t code; +} SVClrTsmaExpWndsRsp; + +int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder* pCoder, const SVClrTsmaExpWndsReq* pReq); +int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder* pCoder, SVClrTsmaExpWndsReq* pReq); +int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder* pCoder, const SVClrTsmaExpWndsRsp* pReq); +int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder* pCoder, SVClrTsmaExpWndsRsp* pReq); + typedef struct { int idx; } SMCreateFullTextReq; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index e922df64b3..81ddc68d5d 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -190,6 +190,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) TD_DEF_MSG_TYPE(TDMT_VND_GET_TSMA_EXP_WNDS, "vnode-get-tsma-expired-windows", SVGetTsmaExpWndsReq, SVGetTsmaExpWndsRsp) + TD_DEF_MSG_TYPE(TDMT_VND_CLR_TSMA_EXP_WNDS, "vnode-clr-tsma-expired-windows", SVClrTsmaExpWndsReq, SVClrTsmaExpWndsRsp) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index bbd7c2e7a0..d7ebf03fe1 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -2419,7 +2419,7 @@ int32_t tDeserializeSTableIndexReq(void *buf, int32_t bufLen, STableIndexReq *pR return 0; } -int32_t tSerializeSTableIndexInfo(SEncoder *pEncoder, STableIndexInfo* pInfo) { +int32_t tSerializeSTableIndexInfo(SEncoder *pEncoder, STableIndexInfo *pInfo) { if (tEncodeI8(pEncoder, pInfo->intervalUnit) < 0) return -1; if (tEncodeI8(pEncoder, pInfo->slidingUnit) < 0) return -1; if (tEncodeI64(pEncoder, pInfo->interval) < 0) return -1; @@ -2440,7 +2440,7 @@ int32_t tSerializeSTableIndexRsp(void *buf, int32_t bufLen, const STableIndexRsp if (tEncodeI32(&encoder, num) < 0) return -1; if (num > 0) { for (int32_t i = 0; i < num; ++i) { - STableIndexInfo* pInfo = (STableIndexInfo*)taosArrayGet(pRsp->pIndex, i); + STableIndexInfo *pInfo = (STableIndexInfo *)taosArrayGet(pRsp->pIndex, i); if (tSerializeSTableIndexInfo(&encoder, pInfo) < 0) return -1; } } @@ -2489,7 +2489,6 @@ int32_t tDeserializeSTableIndexRsp(void *buf, int32_t bufLen, STableIndexRsp *pR return 0; } - int32_t tSerializeSShowReq(void *buf, int32_t bufLen, SShowReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -3856,7 +3855,7 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - if (pSma->numOfVgroups) { // only needed in dstVgroup + if (pSma->numOfVgroups) { // only needed in dstVgroup for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; @@ -3903,7 +3902,7 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { // only needed in dstVgroup + if (pSma->numOfVgroups > 0) { // only needed in dstVgroup pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); if (!pSma->pVgEpSet) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -4018,6 +4017,49 @@ int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder *pCoder, SVGetTsmaExpWndsRsp *pReq) return 0; } +int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder *pCoder, const SVClrTsmaExpWndsReq *pReq) { + if (tStartEncode(pCoder) < 0) return -1; + if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; + if (tEncodeI64(pCoder, pReq->version) < 0) return -1; + if (tEncodeI64v(pCoder, pReq->nItems) < 0) return -1; + for (int64_t n = 0; pReq->nItems; ++n) { + if (tEncodeI64v(pCoder, pReq->items[n].nKeys) < 0) return -1; + if (tEncodeI64(pCoder, pReq->items[n].skey) < 0) return -1; + } + tEndEncode(pCoder); + return 0; +} + +int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) { + if (tStartDecode(pCoder) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->version) < 0) return -1; + if (tDecodeI64v(pCoder, &pReq->nItems) < 0) return -1; + + for (int64_t i = 0; i < pReq->nItems; ++i) { + if (tDecodeI64v(pCoder, &pReq->items[i].nKeys) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->items[i].skey) < 0) return -1; + } + tEndDecode(pCoder); + return 0; +} + +int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { + if (tStartEncode(pCoder) < 0) return -1; + if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; + if (tEncodeI32(pCoder, pReq->code) < 0) return -1; + tEndEncode(pCoder); + return 0; +} + +int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { + if (tStartDecode(pCoder) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; + if (tDecodeI32(pCoder, &pReq->code) < 0) return -1; + tEndDecode(pCoder); + return 0; +} + int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 4c5a32536f..9c3c0af986 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -348,6 +348,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_CLR_TSMA_EXP_WNDS, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index dd705011c4..157162161e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -25,6 +25,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); +static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); int32_t vnodePreprocessReq(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; @@ -174,7 +175,8 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } vTrace("vgId:%d, process %s request success, version: %" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); -#if 1 + +#if 0 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; @@ -225,6 +227,7 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { vTrace("message in fetch queue is processing"); char *msgstr = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + switch (pMsg->msgType) { case TDMT_VND_FETCH: return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg, 0); @@ -236,13 +239,10 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return qWorkerProcessDropMsg(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_VND_QUERY_HEARTBEAT: return qWorkerProcessHbMsg(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_TABLE_META: return vnodeGetTableMeta(pVnode, pMsg); - case TDMT_VND_CONSUME: return tqProcessPollReq(pVnode->pTq, pMsg, pInfo->workerId); - case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: @@ -253,6 +253,8 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RECOVER_RSP: return tqProcessTaskRecoverRsp(pVnode->pTq, pMsg); + case TDMT_VND_CLR_TSMA_EXP_WNDS: + return vnodeProcessExpWndsClrReq(pVnode, pMsg, msgLen, NULL); default: vError("unknown msg type:%d in fetch queue", pMsg->msgType); return TSDB_CODE_VND_APP_ERROR; @@ -896,3 +898,42 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void return 0; } + +static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp) { + SVClrTsmaExpWndsReq req = {0}; + SDecoder coder = {0}; + + if (pRsp) { + pRsp->msgType = TDMT_VND_CLR_TSMA_EXP_WNDS_RSP; + pRsp->code = TSDB_CODE_SUCCESS; + pRsp->pCont = NULL; + pRsp->contLen = 0; + } + + // decode and process + tDecoderInit(&coder, pReq, len); + + if (tDecodeSVClrTsmaExpWndsReq(&coder, &req) < 0) { + terrno = TSDB_CODE_MSG_DECODE_ERROR; + if (pRsp) pRsp->code = terrno; + goto _err; + } + + ASSERT(0); + + // if (tdProcess(pVnode->pSma, version, (const char *)&req) < 0) { + // if (pRsp) pRsp->code = terrno; + // goto _err; + // } + + tDecoderClear(&coder); + vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), + req.indexUid, req.version); + return 0; + +_err: + tDecoderClear(&coder); + vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), + req.indexUid, req.version, terrstr()); + return -1; +} \ No newline at end of file From 2b2485d175db7509dc0bc4a1c8333a1c4e6d1b86 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 9 Jun 2022 20:02:55 +0800 Subject: [PATCH 03/16] feat: tsma exp wnds process --- include/util/taoserror.h | 16 +- source/common/src/tmsg.c | 4 +- source/dnode/vnode/src/inc/sma.h | 7 +- source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/meta/metaSma.c | 2 +- source/dnode/vnode/src/sma/sma.c | 11 +- source/dnode/vnode/src/sma/smaEnv.c | 6 +- source/dnode/vnode/src/sma/smaRollup.c | 6 +- source/dnode/vnode/src/sma/smaTimeRange.c | 65 ++++--- source/dnode/vnode/src/sma/smaTimeRange2.c | 205 ++++++++++++++------- source/dnode/vnode/src/tsdb/tsdbCommit.c | 2 +- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 4 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +- source/dnode/vnode/test/tsdbSmaTest.cpp | 2 +- source/util/src/terror.c | 46 +++-- 15 files changed, 244 insertions(+), 143 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ae0191e6d2..cac5c0758d 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -351,9 +351,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_TDB_NO_CACHE_LAST_ROW TAOS_DEF_ERROR_CODE(0, 0x0619) #define TSDB_CODE_TDB_TABLE_RECREATED TAOS_DEF_ERROR_CODE(0, 0x061A) #define TSDB_CODE_TDB_TDB_ENV_OPEN_ERROR TAOS_DEF_ERROR_CODE(0, 0x061B) -#define TSDB_CODE_TDB_NO_SMA_INDEX_IN_META TAOS_DEF_ERROR_CODE(0, 0x061C) -#define TSDB_CODE_TDB_INVALID_SMA_STAT TAOS_DEF_ERROR_CODE(0, 0x061D) -#define TSDB_CODE_TDB_TSMA_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x061E) // query #define TSDB_CODE_QRY_INVALID_QHANDLE TAOS_DEF_ERROR_CODE(0, 0x0700) @@ -684,6 +681,19 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SML_INVALID_DATA TAOS_DEF_ERROR_CODE(0, 0x3002) #define TSDB_CODE_SML_INVALID_DB_CONF TAOS_DEF_ERROR_CODE(0, 0x3003) +//tsma +#define TSDB_CODE_TSMA_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x3100) +#define TSDB_CODE_TSMA_NO_INDEX_IN_META TAOS_DEF_ERROR_CODE(0, 0x3101) +#define TSDB_CODE_TSMA_INVALID_ENV TAOS_DEF_ERROR_CODE(0, 0x3102) +#define TSDB_CODE_TSMA_INVALID_STAT TAOS_DEF_ERROR_CODE(0, 0x3103) +#define TSDB_CODE_TSMA_NO_INDEX_IN_CACHE TAOS_DEF_ERROR_CODE(0, 0x3104) +#define TSDB_CODE_TSMA_RM_SKEY_IN_HASH TAOS_DEF_ERROR_CODE(0, 0x3105) + +//rsma +#define TSDB_CODE_RSMA_INVALID_ENV TAOS_DEF_ERROR_CODE(0, 0x3150) +#define TSDB_CODE_RSMA_INVALID_STAT TAOS_DEF_ERROR_CODE(0, 0x3151) + + #ifdef __cplusplus } #endif diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index fd99953e7a..42f004ebf5 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4049,7 +4049,7 @@ int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { if (tStartEncode(pCoder) < 0) return -1; if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI32(pCoder, pReq->code) < 0) return -1; + if (tEncodeI32v(pCoder, pReq->code) < 0) return -1; tEndEncode(pCoder); return 0; } @@ -4057,7 +4057,7 @@ int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp * int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { if (tStartDecode(pCoder) < 0) return -1; if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI32(pCoder, &pReq->code) < 0) return -1; + if (tDecodeI32v(pCoder, &pReq->code) < 0) return -1; tEndDecode(pCoder); return 0; } diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 4ca62f1de9..7eb5c34e5d 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -70,7 +70,7 @@ struct SSmaStatItem { * N.B. only applicable to tsma */ int8_t state; // ETsdbSmaStat - SHashObj *expiredWindows; // key: skey of time window, value: version + SHashObj *expireWindows; // key: skey of time window, value: version STSma *pTSma; // cache schema }; @@ -128,7 +128,7 @@ int32_t tdInsertRSmaData(SSma *pSma, char *msg); int32_t tdRefSmaStat(SSma *pSma, SSmaStat *pStat); int32_t tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat); -int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType); +int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck); int32_t tdLockSma(SSma *pSma); int32_t tdUnLockSma(SSma *pSma); @@ -219,7 +219,8 @@ static int32_t tdInitSmaEnv(SSma *pSma, int8_t smaType, const char *path, SDisk void *tdFreeRSmaInfo(SRSmaInfo *pInfo); int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); +int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg); // TODO: This is the basic params, and should wrap the params to a queryHandle. int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 5532f202fc..944a03f70a 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -151,6 +151,7 @@ int32_t smaOpen(SVnode* pVnode); int32_t smaClose(SSma* pSma); int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version); +int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); @@ -227,7 +228,7 @@ struct SVnode { SQHandle* pQuery; }; -#define TD_VID(PVNODE) (PVNODE)->config.vgId +#define TD_VID(PVNODE) ((PVNODE)->config.vgId) #define VND_TSDB(vnd) ((vnd)->pTsdb) #define VND_RSMA0(vnd) ((vnd)->pTsdb) diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 910a0835bb..0b6a526d8c 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -38,7 +38,7 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) { metaReaderInit(&mr, pMeta, 0); if (metaGetTableEntryByUid(&mr, pCfg->indexUid) == 0) { #if 1 - terrno = TSDB_CODE_TDB_TSMA_ALREADY_EXIST; + terrno = TSDB_CODE_TSMA_ALREADY_EXIST; metaReaderClear(&mr); return -1; // don't goto _err; #else diff --git a/source/dnode/vnode/src/sma/sma.c b/source/dnode/vnode/src/sma/sma.c index fd5dd080ca..5782318006 100644 --- a/source/dnode/vnode/src/sma/sma.c +++ b/source/dnode/vnode/src/sma/sma.c @@ -38,8 +38,15 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg) { int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version) { int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdUpdateExpiredWindowImpl(pSma, pMsg, version)) < 0) { - smaWarn("vgId:%d, update expired sma window failed since %s", SMA_VID(pSma), tstrerror(terrno)); + if ((code = tdUpdateExpireWindowImpl(pSma, pMsg, version)) < 0) { + smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); + } + return code; +} +int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg) { + int32_t code = TSDB_CODE_SUCCESS; + if ((code = tdClearExpireWindowImpl(pSma, pMsg)) < 0) { + smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); } return code; } diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 179f573e8d..d15769e28e 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -242,7 +242,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType) { } /** - * 1. Lazy mode utilized when init SSmaStat to update expired window(or hungry mode when tdNew). + * 1. Lazy mode utilized when init SSmaStat to update expire window(or hungry mode when tdNew). * 2. Currently, there is mutex lock when init SSmaEnv, thus no need add lock on SSmaStat, and please add lock if * tdInitSmaStat invoked in other multithread environment later. */ @@ -280,7 +280,7 @@ void *tdFreeSmaStatItem(SSmaStatItem *pSmaStatItem) { if (pSmaStatItem) { tDestroyTSma(pSmaStatItem->pTSma); taosMemoryFreeClear(pSmaStatItem->pTSma); - taosHashCleanup(pSmaStatItem->expiredWindows); + taosHashCleanup(pSmaStatItem->expireWindows); taosMemoryFreeClear(pSmaStatItem); } return NULL; @@ -341,7 +341,7 @@ int32_t tdUnLockSma(SSma *pSma) { return 0; } -int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType) { +int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck) { SSmaEnv *pEnv = NULL; // return if already init diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 05ad5c1cb2..1b34529506 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -65,7 +65,7 @@ static FORCE_INLINE int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SA pRSmaInfo = taosHashGet(SMA_STAT_INFO_HASH(pStat), suid, sizeof(tb_uid_t)); if (!pRSmaInfo || !(pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { smaError("vgId:%d, failed to get rsma info for uid:%" PRIi64, SMA_VID(pSma), *suid); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; } @@ -132,7 +132,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui SSmaStat *pStat = SMA_ENV_STAT(pEnv); SHashObj *infoHash = NULL; if (!pStat || !(infoHash = SMA_STAT_INFO_HASH(pStat))) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; } @@ -181,7 +181,7 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { return TSDB_CODE_SUCCESS; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_ROLLUP) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_ROLLUP, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index b72be06455..4cc9703531 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -69,10 +69,10 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, TXN *txn); -// expired window +// expire window -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); // read data @@ -319,7 +319,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } @@ -347,7 +347,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { } if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } @@ -515,7 +515,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // TODO:tsdbEndTSmaCommit(); // Step 3: reset the SSmaStat - tdResetExpiredWindow(pSma, pStat, indexUid, skey); + tdResetExpireWindow(pSma, pStat, indexUid, skey); } else { smaWarn("vgId:%d, invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, SMA_VID(pSma), skey, tlen, indexUid); @@ -572,7 +572,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL } /** - * @brief When sma data received from stream computing, make the relative expired window valid. + * @brief When sma data received from stream computing, make the relative expire window valid. * * @param pSma * @param pStat @@ -580,7 +580,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL * @param skey * @return int32_t */ -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { SSmaStatItem *pItem = NULL; tdRefSmaStat(pSma, pStat); @@ -591,14 +591,14 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { // pItem resides in hash buffer all the time unless drop sma index // TODO: multithread protect - if (taosHashRemove(pItem->expiredWindows, &skey, sizeof(TSKEY)) != 0) { + if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, + smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " succeed", SMA_VID(pSma), + smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), skey, indexUid); // TODO: use a standalone interface to received state upate notification from stream computing module. /** @@ -612,7 +612,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } else { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expired window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); + smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } @@ -711,7 +711,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int32_t nQueryWin = taosArrayGetSize(pQuerySKey); for (int32_t n = 0; n < nQueryWin; ++n) { TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expiredWindows, &skey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { // TODO: mark this window as expired. } } @@ -721,18 +721,18 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int8_t smaStat = 0; if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, tstrerror(terrno), smaStat); return TSDB_CODE_FAILED; } - if (taosHashGet(pItem->expiredWindows, &querySKey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } @@ -747,7 +747,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query tdUnRefSmaStat(pSma, pStat); tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, + smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); @@ -860,9 +860,9 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { } pItem->state = state; - pItem->expiredWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expiredWindows) { + pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), + true, HASH_ENTRY_LOCK); + if (!pItem->expireWindows) { taosMemoryFreeClear(pItem); return NULL; } @@ -870,8 +870,7 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { return pItem; } -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, - int64_t version) { +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); if (!pItem) { // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later @@ -885,8 +884,8 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde // cache smaMeta STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); if (!pTSma) { - terrno = TSDB_CODE_TDB_NO_SMA_INDEX_IN_META; - taosHashCleanup(pItem->expiredWindows); + terrno = TSDB_CODET_TSMA_NO_INDEX_IN_META; + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), indexUid, tstrerror(terrno)); @@ -896,7 +895,7 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); return TSDB_CODE_FAILED; } @@ -905,14 +904,14 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde return TSDB_CODE_FAILED; } - if (taosHashPut(pItem->expiredWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expired windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would + if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { + // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would // tell query module to query raw TS data. // N.B. // 1) It is assumed to be extemely little probability event of fail to taosHashPut. // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired // windows failed to put into hash table. - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFreeClear(pItem->pTSma); taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, @@ -926,13 +925,13 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde } /** - * @brief Update expired window according to msg from stream computing module. + * @brief Update expire window according to msg from stream computing module. * * @param pSma * @param msg SSubmitReq * @return int32_t */ -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { // no time-range-sma, just return success if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { smaTrace("vgId:%d, not update expire window since no tSma", SMA_VID(pSma)); @@ -945,7 +944,7 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve return TSDB_CODE_FAILED; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) < 0) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { smaError("vgId:%d, init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; @@ -1019,7 +1018,7 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve if (lastWinSKey != winSKey) { lastWinSKey = winSKey; - if (tdSetExpiredWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { + if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { pSW = tFreeTSmaWrapper(pSW, false); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index df89ec1795..760eb808bb 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -72,10 +72,10 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, TXN *txn); -// expired window +// expire window -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); /** @@ -149,7 +149,7 @@ static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, } /** - * @brief Init of tSma FS + * @brief Init of tsma FS * * @param pReadH * @param indexUid @@ -169,7 +169,7 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { } /** - * @brief Set and open tSma file if it has key locates in queryWin. + * @brief Set and open tsma file if it has key locates in queryWin. * * @param pReadH * @param param @@ -335,7 +335,7 @@ static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) } /** - * @brief Judge the tSma storage level + * @brief Judge the tsma storage level * * @param pCfg * @param interval @@ -362,7 +362,7 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { * @return int32_t */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); + STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); #if 0 const SArray *pDataBlocks = (const SArray *)msg; @@ -370,20 +370,20 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } if (!pDataBlocks) { terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d insert tSma data failed since pDataBlocks is NULL", SMA_VID(pSma)); + smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is NULL", SMA_VID(pSma)); return terrno; } if (taosArrayGetSize(pDataBlocks) <= 0) { terrno = TSDB_CODE_INVALID_PARA; - smaWarn("vgId:%d insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); + smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } #endif @@ -399,7 +399,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { } if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } @@ -414,7 +414,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { int32_t code = TSDB_CODE_SUCCESS; if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d drop tSma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); + smaWarn("vgId:%d, drop tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); } return code; } @@ -435,11 +435,11 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL // TODO: insert tsma data blocks into B+Tree(TTB) if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", + smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", + smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); #ifdef _TEST_SMA_PRINT_DEBUG_LOG_ @@ -447,14 +447,14 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); ASSERT(data != NULL); for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); + smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); } #endif return TSDB_CODE_SUCCESS; } /** - * @brief When sma data received from stream computing, make the relative expired window valid. + * @brief When sma data received from stream computing, make the relative expire window valid. * * @param pSma * @param pStat @@ -462,7 +462,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL * @param skey * @return int32_t */ -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { SSmaStatItem *pItem = NULL; tdRefSmaStat(pSma, pStat); @@ -473,14 +473,14 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { // pItem resides in hash buffer all the time unless drop sma index // TODO: multithread protect - if (taosHashRemove(pItem->expiredWindows, &skey, sizeof(TSKEY)) != 0) { + if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, + smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " succeed", SMA_VID(pSma), + smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), skey, indexUid); // TODO: use a standalone interface to received state upate notification from stream computing module. /** @@ -494,7 +494,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } else { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d expired window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); + smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } @@ -503,7 +503,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } /** - * @brief Drop tSma data and local cache + * @brief Drop tsma data and local cache * - insert/query reference * @param pSma * @param msg @@ -514,19 +514,19 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { // clear local cache if (pEnv) { - smaDebug("vgId:%d drop tSma local cache for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, drop tsma local cache for %" PRIi64, SMA_VID(pSma), indexUid); SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode } tdWLockSmaEnv(pEnv); if (tdSmaStatIsDropped(pItem)) { tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode } tdSmaStatSetDropped(pItem); @@ -536,19 +536,20 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { int32_t refVal = INT32_MAX; while (true) { if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); + smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); break; } - smaDebug("vgId:%d wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); + smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); taosSsleep(1); if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, refVal); + smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, + refVal); break; }; } tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); } } // clear sma data files @@ -572,7 +573,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query if (!pEnv) { terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); + smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); return TSDB_CODE_FAILED; } @@ -585,7 +586,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query // it's NULL. tdUnRefSmaStat(pSma, pStat); terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_FAILED; } @@ -593,7 +594,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int32_t nQueryWin = taosArrayGetSize(pQuerySKey); for (int32_t n = 0; n < nQueryWin; ++n) { TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expiredWindows, &skey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { // TODO: mark this window as expired. } } @@ -603,18 +604,18 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int8_t smaStat = 0; if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; - smaWarn("vgId:%d getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, + terrno = TSDB_CODE_TSMA_INVALID_STAT; + smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, tstrerror(terrno), smaStat); return TSDB_CODE_FAILED; } - if (taosHashGet(pItem->expiredWindows, &querySKey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { // TODO: mark this window as expired. - smaDebug("vgId:%d skey %" PRIi64 " of window exists in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } else { - smaDebug("vgId:%d skey %" PRIi64 " of window not in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } @@ -629,10 +630,10 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query tdUnRefSmaStat(pSma, pStat); tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, + smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); + smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); return TSDB_CODE_FAILED; } @@ -641,13 +642,13 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int64_t queryGroupId = 0; tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - smaDebug("vgId:%d get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, + smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); void *result = NULL; int32_t valueSize = 0; if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", + smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); smaCloseDBF(&tReadH.dFile); return TSDB_CODE_FAILED; @@ -656,7 +657,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query #ifdef _TEST_SMA_PRINT_DEBUG_LOG_ for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); + smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); } #endif taosMemoryFreeClear(result); // TODO: fill the result to output @@ -721,7 +722,7 @@ int32_t tdDropTSma(SSma *pSma, char *pMsg) { return -1; } - // TODO: send msg to stream computing to drop tSma + // TODO: send msg to stream computing to drop tsma // if ((send msg to stream computing) < 0) { // tDestroyTSma(&vCreateSmaReq); // return -1; @@ -755,9 +756,9 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { } pItem->state = state; - pItem->expiredWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expiredWindows) { + pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), + true, HASH_ENTRY_LOCK); + if (!pItem->expireWindows) { taosMemoryFreeClear(pItem); return NULL; } @@ -765,8 +766,7 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { return pItem; } -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, - int64_t version) { +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); if (!pItem) { // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later @@ -780,10 +780,10 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde // cache smaMeta STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); if (!pTSma) { - terrno = TSDB_CODE_TDB_NO_SMA_INDEX_IN_META; - taosHashCleanup(pItem->expiredWindows); + terrno = TSDB_CODE_TSMA_NO_INDEX_IN_META; + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); - smaWarn("vgId:%d set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), + smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), indexUid, tstrerror(terrno)); return TSDB_CODE_FAILED; } @@ -791,7 +791,7 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); return TSDB_CODE_FAILED; } @@ -800,53 +800,53 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde return TSDB_CODE_FAILED; } - if (taosHashPut(pItem->expiredWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expired windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would + if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { + // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would // tell query module to query raw TS data. // N.B. // 1) It is assumed to be extemely little probability event of fail to taosHashPut. // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired // windows failed to put into hash table. - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFreeClear(pItem->pTSma); taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, + smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, winSKey); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, + smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, winSKey); return TSDB_CODE_SUCCESS; } /** - * @brief Update expired window according to msg from stream computing module. + * @brief Update expire window according to msg from stream computing module. * * @param pSma * @param msg SSubmitReq * @return int32_t */ -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { // no time-range-sma, just return success if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d not update expire window since no tSma", SMA_VID(pSma)); + smaTrace("vgId:%d, not update expire window since no tsma", SMA_VID(pSma)); return TSDB_CODE_SUCCESS; } if (!SMA_META(pSma)) { terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d update expire window failed since no meta ptr", SMA_VID(pSma)); + smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); return TSDB_CODE_FAILED; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) < 0) { - smaError("vgId:%d init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { + smaError("vgId:%d, init tsma env failed since %s", SMA_VID(pSma), terrstr(terrno)); terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } - // Firstly, assume that tSma can only be created on super table/normal table. + // Firstly, assume that tsma can only be created on super table/normal table. // getActiveTimeWindow SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); @@ -914,13 +914,13 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve if (lastWinSKey != winSKey) { lastWinSKey = winSKey; - if (tdSetExpiredWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { + if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { pSW = tFreeTSmaWrapper(pSW, false); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } } else { - smaDebug("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", + smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", SMA_VID(pSma), pTSma->indexUid, winSKey); } } @@ -928,5 +928,78 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_SUCCESS; +} + +/** + * @brief Clear skeys from tsma dstVgroups in expire window. + * + * @param pSma + * @param pMsg + * @return int32_t + */ +int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg) { + int64_t indexUid = pMsg->indexUid; + + if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { + smaWarn("vgId:%d, not clear expire window since no tsma for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); + terrno = TSDB_CODE_TSMA_INVALID_ENV; + return TSDB_CODE_FAILED; + } + + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, true) < 0) { + smaWarn("vgId:%d, not clear expire window since no tsma env", SMA_VID(pSma)); + terrno = TSDB_CODE_TSMA_INVALID_ENV; + return TSDB_CODE_FAILED; + } + + // Firstly, assume that tsma can only be created on super table/normal table. + // getActiveTimeWindow + + SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); + SSmaStat *pStat = SMA_ENV_STAT(pEnv); + SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); + + ASSERT(pEnv && pStat && pItemsHash); + + // basic procedure + // TODO: optimization + tdRefSmaStat(pSma, pStat); + + SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); + if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { + smaWarn("vgId:%d, no sma item to clear expire window for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); + terrno = TSDB_CODE_TSMA_NO_INDEX_IN_CACHE; + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_FAILED; + } + + for (int64_t i = 0; i < pMsg->nItems; ++i) { + const SVTsmaExpWndItem *pWndItem = &pMsg->items[i]; + int64_t winSKey = pWndItem->skey; + for (int64_t j = 0; j < pWndItem->nKeys; ++j) { + winSKey += pItem->pTSma->interval; + if (taosHashRemove(pItem->expireWindows, &winSKey, sizeof(winSKey)) != 0) { + // If error occurs during taosHashRemove expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB + // would tell query module to query raw TS data. N.B. + // 1) It is assumed to be extemely little probability event of fail to taosHashPut. + // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired + // windows failed to put into hash table. + taosHashCleanup(pItem->expireWindows); + taosMemoryFreeClear(pItem->pTSma); + taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); + smaWarn("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " fail", SMA_VID(pSma), winSKey, + indexUid); + terrno = TSDB_CODE_TSMA_RM_SKEY_IN_HASH; + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_FAILED; + } + smaDebug("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " success", SMA_VID(pSma), winSKey, + indexUid); + } + } + + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 17b8afda4b..90093f2510 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -150,7 +150,7 @@ int32_t tsdbCommit(STsdb *pTsdb) { return code; _err: - tsdbError("vgId:%d failed to commit since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d, failed to commit since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index ffbef4e765..d8bc87d471 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -176,13 +176,13 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid pMemTable->nDelOp++; - tsdbError("vgId:%d delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 + tsdbError("vgId:%d, delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 " since %s", TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code)); return code; _err: - tsdbError("vgId:%d failed to delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 + tsdbError("vgId:%d, failed to delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 " since %s", TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code)); return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 3a47a54637..55ab5f5729 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -918,10 +918,10 @@ static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len ASSERT(0); - // if (tdProcess(pVnode->pSma, version, (const char *)&req) < 0) { - // if (pRsp) pRsp->code = terrno; - // goto _err; - // } + if (tdClearExpireWindow(pVnode->pSma, (const SVClrTsmaExpWndsReq *)&req) < 0) { + if (pRsp) pRsp->code = terrno; + goto _err; + } tDecoderClear(&coder); vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), diff --git a/source/dnode/vnode/test/tsdbSmaTest.cpp b/source/dnode/vnode/test/tsdbSmaTest.cpp index 3b8c94e413..0161fac9b5 100644 --- a/source/dnode/vnode/test/tsdbSmaTest.cpp +++ b/source/dnode/vnode/test/tsdbSmaTest.cpp @@ -373,7 +373,7 @@ TEST(testCase, tSma_Data_Insert_Query_Test) { pTsdb->pTfs = tfsOpen(&pDisks, numOfDisks); EXPECT_NE(pTsdb->pTfs, nullptr); - // generate SSubmitReq msg and update expired window + // generate SSubmitReq msg and update expire window int16_t schemaVer = 0; uint32_t mockRowLen = sizeof(STSRow); uint32_t mockRowNum = 2; diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 71c348f810..7b9c254a20 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -352,9 +352,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TDB_IVLD_TAG_VAL, "TSDB invalid tag valu TAOS_DEFINE_ERROR(TSDB_CODE_TDB_NO_CACHE_LAST_ROW, "TSDB no cache last row data") TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TABLE_RECREATED, "Table re-created") TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TDB_ENV_OPEN_ERROR, "TDB env open error") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_NO_SMA_INDEX_IN_META, "No sma index in meta") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_INVALID_SMA_STAT, "Invalid sma state") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TSMA_ALREADY_EXIST, "TSMA already exists") // query @@ -536,25 +533,38 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_DELETE_WHERE, "The DELETE statemen TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_REDISTRIBUTE_VG, "The REDISTRIBUTE VGROUP statement only support 1 to 3 dnodes") //planner -TAOS_DEFINE_ERROR(TSDB_CODE_PLAN_INTERNAL_ERROR, "Planner internal error") +TAOS_DEFINE_ERROR(TSDB_CODE_PLAN_INTERNAL_ERROR, "Planner internal error") //udf -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_READ_ERR, "udf pipe read error") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_CONNECT_ERR, "udf pipe connect error") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_NO_PIPE, "udf no pipe") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_LOAD_UDF_FAILURE, "udf load failure") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_STATE, "udf invalid state") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_INPUT, "udf invalid function input") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_NO_FUNC_HANDLE, "udf no function handle") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_BUFSIZE, "udf invalid bufsize") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_OUTPUT_TYPE, "udf invalid output type") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_READ_ERR, "udf pipe read error") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_CONNECT_ERR, "udf pipe connect error") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_NO_PIPE, "udf no pipe") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_LOAD_UDF_FAILURE, "udf load failure") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_STATE, "udf invalid state") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_INPUT, "udf invalid function input") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_NO_FUNC_HANDLE, "udf no function handle") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_BUFSIZE, "udf invalid bufsize") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_OUTPUT_TYPE, "udf invalid output type") //schemaless -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PROTOCOL_TYPE, "Invalid line protocol type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PRECISION_TYPE, "Invalid timestamp precision type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DATA, "Invalid data type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DB_CONF, "Invalid schemaless db config") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PROTOCOL_TYPE, "Invalid line protocol type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PRECISION_TYPE, "Invalid timestamp precision type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DATA, "Invalid data type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DB_CONF, "Invalid schemaless db config") + +//tsma +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_ALREADY_EXIST, "Tsma already exists") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_NO_INDEX_IN_META, "No tsma index in meta") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_INVALID_ENV, "Invalid tsma env") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_INVALID_STAT, "Invalid tsma state") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_NO_INDEX_IN_CACHE, "No tsma index in cache") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_RM_SKEY_IN_HASH, "Rm tsma skey in cache") + +//rsma +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_ENV, "Invalid rsma env") +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_STAT, "Invalid rsma state") + #ifdef TAOS_ERROR_C }; From 2160a112f52d916c4942a5844cc928d33004a73d Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 10 Jun 2022 09:59:47 +0800 Subject: [PATCH 04/16] other: merge 3.0 --- include/common/tmsgdef.h | 3 + include/libs/stream/tstream.h | 2 + include/libs/sync/sync.h | 27 +- include/libs/sync/syncTools.h | 94 +++ include/libs/wal/wal.h | 14 +- source/common/src/tdatablock.c | 1 + source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 9 +- source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 2 +- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/mnode/impl/inc/mndInt.h | 3 +- source/dnode/mnode/impl/inc/mndVgroup.h | 1 + source/dnode/mnode/impl/src/mndDb.c | 51 -- source/dnode/mnode/impl/src/mndDnode.c | 2 + source/dnode/mnode/impl/src/mndMain.c | 154 +++-- source/dnode/mnode/impl/src/mndSync.c | 64 +- source/dnode/mnode/impl/src/mndTrans.c | 8 +- source/dnode/mnode/impl/src/mndVgroup.c | 151 ++++- source/dnode/mnode/sdb/inc/sdb.h | 10 +- source/dnode/mnode/sdb/src/sdb.c | 14 +- source/dnode/mnode/sdb/src/sdbFile.c | 38 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 14 + source/dnode/vnode/src/vnd/vnodeSync.c | 2 + source/libs/index/inc/indexCache.h | 22 +- source/libs/index/inc/indexComm.h | 4 +- source/libs/index/inc/indexInt.h | 24 +- source/libs/index/inc/indexTfile.h | 8 +- source/libs/index/src/index.c | 68 +- source/libs/index/src/indexCache.c | 104 +-- source/libs/index/src/indexComm.c | 22 +- source/libs/index/src/indexFilter.c | 2 +- source/libs/index/src/indexJson.c | 4 +- source/libs/index/src/indexTfile.c | 28 +- source/libs/index/test/indexTests.cc | 10 +- source/libs/sync/inc/syncAppendEntries.h | 1 + source/libs/sync/inc/syncAppendEntriesReply.h | 1 + source/libs/sync/inc/syncElection.h | 2 + source/libs/sync/inc/syncIO.h | 3 + source/libs/sync/inc/syncIndexMgr.h | 4 + source/libs/sync/inc/syncInt.h | 53 +- source/libs/sync/inc/syncRaftCfg.h | 8 +- source/libs/sync/inc/syncRaftLog.h | 10 +- source/libs/sync/inc/syncRaftStore.h | 4 +- source/libs/sync/inc/syncReplication.h | 1 + source/libs/sync/inc/syncRequestVote.h | 1 + source/libs/sync/inc/syncRequestVoteReply.h | 1 + source/libs/sync/inc/syncSnapshot.h | 60 +- source/libs/sync/inc/syncUtil.h | 1 + source/libs/sync/src/syncAppendEntries.c | 337 +++++++++- source/libs/sync/src/syncAppendEntriesReply.c | 115 ++++ source/libs/sync/src/syncCommit.c | 105 +--- source/libs/sync/src/syncElection.c | 28 +- source/libs/sync/src/syncIO.c | 30 +- source/libs/sync/src/syncIndexMgr.c | 62 +- source/libs/sync/src/syncMain.c | 431 ++++++++++++- source/libs/sync/src/syncMessage.c | 424 ++++++++++++- source/libs/sync/src/syncRaftCfg.c | 9 +- source/libs/sync/src/syncRaftLog.c | 304 ++++++++- source/libs/sync/src/syncReplication.c | 83 ++- source/libs/sync/src/syncRequestVote.c | 65 ++ source/libs/sync/src/syncRequestVoteReply.c | 66 ++ source/libs/sync/src/syncSnapshot.c | 595 +++++++++++++++++- source/libs/sync/src/syncUtil.c | 22 + source/libs/sync/test/CMakeLists.txt | 126 ++++ .../sync/test/syncAppendEntriesReplyTest.cpp | 2 + .../libs/sync/test/syncAppendEntriesTest.cpp | 1 + .../test/syncConfigChangeSnapshotTest.cpp | 366 +++++++++++ .../libs/sync/test/syncConfigChangeTest.cpp | 1 - source/libs/sync/test/syncIndexMgrTest.cpp | 82 +-- source/libs/sync/test/syncRaftCfgTest.cpp | 6 +- source/libs/sync/test/syncRaftLogTest.cpp | 172 +++++ source/libs/sync/test/syncRaftLogTest2.cpp | 437 +++++++++++++ source/libs/sync/test/syncRaftLogTest3.cpp | 388 ++++++++++++ .../sync/test/syncSnapshotReceiverTest.cpp | 63 ++ source/libs/sync/test/syncSnapshotRspTest.cpp | 101 +++ .../libs/sync/test/syncSnapshotSendTest.cpp | 101 +++ .../libs/sync/test/syncSnapshotSenderTest.cpp | 72 +++ source/libs/sync/test/syncTest.cpp | 7 +- source/libs/sync/test/syncTestTool.cpp | 399 ++++++++++++ source/libs/sync/test/syncTimeoutTest.cpp | 21 + source/libs/wal/inc/walInt.h | 1 + source/libs/wal/src/walMeta.c | 9 + source/libs/wal/src/walMgmt.c | 14 +- source/libs/wal/src/walWrite.c | 39 +- tests/script/jenkins/basic.txt | 3 +- tests/script/tsim/dnode/drop_dnode_mnode.sim | 52 ++ tests/script/tsim/mnode/basic3.sim | 5 +- 86 files changed, 5670 insertions(+), 585 deletions(-) create mode 100644 source/libs/sync/test/syncConfigChangeSnapshotTest.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest2.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest3.cpp create mode 100644 source/libs/sync/test/syncSnapshotReceiverTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotRspTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotSendTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotSenderTest.cpp create mode 100644 source/libs/sync/test/syncTestTool.cpp create mode 100644 tests/script/tsim/dnode/drop_dnode_mnode.sim diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 81ddc68d5d..dba80f5397 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -195,6 +195,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIRM, "alter-confirm", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_ALTER_HASHRANGE, "alter-hashrange", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_COMPACT, "compact", NULL, NULL) TD_NEW_MSG_SEG(TDMT_QND_MSG) @@ -234,6 +235,8 @@ enum { TD_DEF_MSG_TYPE(TDMT_SYNC_COMMON_RESPONSE, "sync-common-response", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_APPLY_MSG, "sync-apply-msg", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_CONFIG_CHANGE, "sync-config-change", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_SEND, "sync-snapshot-send", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_RSP, "sync-snapshot-rsp", NULL, NULL) #if defined(TD_MSG_NUMBER_) TDMT_MAX diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 6b5eb3b491..31d13d6cf4 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -308,9 +308,11 @@ static FORCE_INLINE int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBloc if (pTask->sinkType == TASK_SINK__TABLE) { ASSERT(pTask->dispatchType == TASK_DISPATCH__NONE); pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks); + taosFreeQitem(pBlock); } else if (pTask->sinkType == TASK_SINK__SMA) { ASSERT(pTask->dispatchType == TASK_DISPATCH__NONE); pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); + taosFreeQitem(pBlock); } else { ASSERT(pTask->dispatchType != TASK_DISPATCH__NONE); taosWriteQitem(pTask->outputQueue->queue, pBlock); diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index a587ad6ef2..10ece0b219 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -88,11 +88,16 @@ typedef struct SReConfigCbMeta { } SReConfigCbMeta; typedef struct SSnapshot { - void *data; + void* data; SyncIndex lastApplyIndex; SyncTerm lastApplyTerm; + SyncIndex lastConfigIndex; } SSnapshot; +typedef struct SSnapshotMeta { + SyncIndex lastConfigIndex; +} SSnapshotMeta; + typedef struct SSyncFSM { void* data; @@ -141,10 +146,28 @@ typedef struct SSyncLogStore { // return commit index of log SyncIndex (*getCommitIndex)(struct SSyncLogStore* pLogStore); + // refactor, log[0 .. n] ==> log[m .. n] + int32_t (*syncLogSetBeginIndex)(struct SSyncLogStore* pLogStore, SyncIndex beginIndex); + int32_t (*syncLogResetBeginIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogBeginIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogEndIndex)(struct SSyncLogStore* pLogStore); + bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore); + int32_t (*syncLogEntryCount)(struct SSyncLogStore* pLogStore); + bool (*syncLogInRange)(struct SSyncLogStore* pLogStore, SyncIndex index); + + SyncIndex (*syncLogWriteIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogLastIndex)(struct SSyncLogStore* pLogStore); + SyncTerm (*syncLogLastTerm)(struct SSyncLogStore* pLogStore); + + int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); + int32_t (*syncLogGetEntry)(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); + int32_t (*syncLogTruncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); + } SSyncLogStore; typedef struct SSyncInfo { bool isStandBy; + bool snapshotEnable; SyncGroupId vgId; SSyncCfg syncCfg; char path[TSDB_FILENAME_LEN]; @@ -172,6 +195,8 @@ bool syncEnvIsStart(); const char* syncStr(ESyncState state); bool syncIsRestoreFinish(int64_t rid); +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); + // to be moved to static void syncStartNormal(int64_t rid); void syncStartStandBy(int64_t rid); diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index bd396edf55..bb50fc141c 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -301,6 +301,7 @@ typedef struct SyncAppendEntries { SyncIndex prevLogIndex; SyncTerm prevLogTerm; SyncIndex commitIndex; + SyncTerm privateTerm; uint32_t dataLen; char data[]; } SyncAppendEntries; @@ -332,6 +333,7 @@ typedef struct SyncAppendEntriesReply { SRaftId destId; // private data SyncTerm term; + SyncTerm privateTerm; bool success; SyncIndex matchIndex; } SyncAppendEntriesReply; @@ -385,6 +387,75 @@ void syncApplyMsgPrint2(char* s, const SyncApplyMsg* pMsg); void syncApplyMsgLog(const SyncApplyMsg* pMsg); void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg); +// --------------------------------------------- +typedef struct SyncSnapshotSend { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + SRaftId srcId; + SRaftId destId; + + SyncTerm term; + SyncIndex lastIndex; // lastIndex of snapshot + SyncTerm lastTerm; // lastTerm of snapshot + SyncTerm privateTerm; + int32_t seq; + uint32_t dataLen; + char data[]; +} SyncSnapshotSend; + +SyncSnapshotSend* syncSnapshotSendBuild(uint32_t dataLen, int32_t vgId); +void syncSnapshotSendDestroy(SyncSnapshotSend* pMsg); +void syncSnapshotSendSerialize(const SyncSnapshotSend* pMsg, char* buf, uint32_t bufLen); +void syncSnapshotSendDeserialize(const char* buf, uint32_t len, SyncSnapshotSend* pMsg); +char* syncSnapshotSendSerialize2(const SyncSnapshotSend* pMsg, uint32_t* len); +SyncSnapshotSend* syncSnapshotSendDeserialize2(const char* buf, uint32_t len); +void syncSnapshotSend2RpcMsg(const SyncSnapshotSend* pMsg, SRpcMsg* pRpcMsg); +void syncSnapshotSendFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotSend* pMsg); +SyncSnapshotSend* syncSnapshotSendFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg); +char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg); + +// for debug ---------------------- +void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg); +void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg); +void syncSnapshotSendLog(const SyncSnapshotSend* pMsg); +void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg); + +// --------------------------------------------- +typedef struct SyncSnapshotRsp { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + SRaftId srcId; + SRaftId destId; + + SyncTerm term; + SyncIndex lastIndex; + SyncTerm lastTerm; + SyncTerm privateTerm; + int32_t ack; + int32_t code; +} SyncSnapshotRsp; + +SyncSnapshotRsp* syncSnapshotRspBuild(int32_t vgId); +void syncSnapshotRspDestroy(SyncSnapshotRsp* pMsg); +void syncSnapshotRspSerialize(const SyncSnapshotRsp* pMsg, char* buf, uint32_t bufLen); +void syncSnapshotRspDeserialize(const char* buf, uint32_t len, SyncSnapshotRsp* pMsg); +char* syncSnapshotRspSerialize2(const SyncSnapshotRsp* pMsg, uint32_t* len); +SyncSnapshotRsp* syncSnapshotRspDeserialize2(const char* buf, uint32_t len); +void syncSnapshotRsp2RpcMsg(const SyncSnapshotRsp* pMsg, SRpcMsg* pRpcMsg); +void syncSnapshotRspFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotRsp* pMsg); +SyncSnapshotRsp* syncSnapshotRspFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg); +char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg); + +// for debug ---------------------- +void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg); +void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg); +void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg); +void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg); + // on message ---------------------- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg); @@ -395,6 +466,29 @@ int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); + +int32_t syncNodeOnSnapshotSendCb(SSyncNode* ths, SyncSnapshotSend* pMsg); +int32_t syncNodeOnSnapshotRspCb(SSyncNode* ths, SyncSnapshotRsp* pMsg); + +// ----------------------------------------- +typedef int32_t (*FpOnPingCb)(SSyncNode* ths, SyncPing* pMsg); +typedef int32_t (*FpOnPingReplyCb)(SSyncNode* ths, SyncPingReply* pMsg); +typedef int32_t (*FpOnClientRequestCb)(SSyncNode* ths, SyncClientRequest* pMsg); +typedef int32_t (*FpOnRequestVoteCb)(SSyncNode* ths, SyncRequestVote* pMsg); +typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply* pMsg); +typedef int32_t (*FpOnAppendEntriesCb)(SSyncNode* ths, SyncAppendEntries* pMsg); +typedef int32_t (*FpOnAppendEntriesReplyCb)(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +typedef int32_t (*FpOnTimeoutCb)(SSyncNode* pSyncNode, SyncTimeout* pMsg); +typedef int32_t (*FpOnSnapshotSendCb)(SSyncNode* ths, SyncSnapshotSend* pMsg); +typedef int32_t (*FpOnSnapshotRspCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg); + +// option ---------------------------------- +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); + // --------------------------------------------- #ifdef __cplusplus diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 95af8ac306..c7d1ccd3de 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -141,6 +141,8 @@ typedef struct SWal { // ctl int64_t refId; TdThreadMutex mutex; + // ref + SHashObj *pRefHash; // ref -> SWalRef // path char path[WAL_PATH_LEN]; // reusable write head @@ -184,7 +186,7 @@ int32_t walRollback(SWal *, int64_t ver); // notify that previous logs can be pruned safely int32_t walBeginSnapshot(SWal *, int64_t ver); int32_t walEndSnapshot(SWal *); -void walRestoreFromSnapshot(SWal *, int64_t ver); +int32_t walRestoreFromSnapshot(SWal *, int64_t ver); // int32_t walDataCorrupted(SWal*); // read @@ -199,6 +201,16 @@ int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalHead *pHead); int32_t walFetchBody(SWalReadHandle *pRead, SWalHead **ppHead); int32_t walSkipFetchBody(SWalReadHandle *pRead, const SWalHead *pHead); +typedef struct { + int64_t refId; + int64_t ver; +} SWalRef; + +SWalRef *walOpenRef(SWal *); +void walCloseRef(SWalRef *); +int32_t walRefVer(SWalRef *, int64_t ver); +int32_t walUnrefVer(SWal *); + // deprecated #if 0 int32_t walRead(SWal *, SWalHead **, int64_t ver); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 5d6425264d..aa679b9414 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1779,6 +1779,7 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, boo } // assign data + // TODO ret = taosMemoryCalloc(1, cap + 46); ret = POINTER_SHIFT(ret, 46); ret->header.vgId = vgId; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 6c2783cb5c..264dc74e36 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -215,8 +215,12 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_HASHRANGE_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MON_MM_INFO, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MON_MM_LOAD, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; @@ -226,9 +230,8 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; - - if (dmSetMgmtHandle(pArray, TDMT_MON_MM_INFO, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MON_MM_LOAD, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; code = 0; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 42fa7b718e..53943b61b0 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -71,7 +71,7 @@ static void mmProcessSyncQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { } static int32_t mmPutNodeMsgToWorker(SSingleWorker *pWorker, SRpcMsg *pMsg) { - dTrace("msg:%p, put into worker %s, type:%s", pMsg, pWorker->name, TMSG_INFO(pMsg->msgType)); + dTrace("msg:%p, put into %s queue, type:%s", pMsg, pWorker->name, TMSG_INFO(pMsg->msgType)); taosWriteQitem(pWorker->queue, pMsg); return 0; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 7cd4ddd713..1540f10ba4 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -360,6 +360,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_HASHRANGE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 6661347e42..4869a19856 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -19,6 +19,7 @@ #include "mndDef.h" #include "sdb.h" +#include "sync.h" #include "syncTools.h" #include "tcache.h" #include "tdatablock.h" @@ -75,7 +76,6 @@ typedef struct { } STelemMgmt; typedef struct { - SWal *pWal; sem_t syncSem; int64_t sync; bool standby; @@ -108,6 +108,7 @@ typedef struct SMnode { SQHandle *pQuery; SHashObj *infosMeta; SHashObj *perfsMeta; + SWal *pWal; SShowMgmt showMgmt; SProfileMgmt profileMgmt; STelemMgmt telemMgmt; diff --git a/source/dnode/mnode/impl/inc/mndVgroup.h b/source/dnode/mnode/impl/inc/mndVgroup.h index 89f93d30b5..c50279889e 100644 --- a/source/dnode/mnode/impl/inc/mndVgroup.h +++ b/source/dnode/mnode/impl/inc/mndVgroup.h @@ -41,6 +41,7 @@ int32_t mndAddAlterVnodeAction(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pV int32_t mndAddDropVnodeAction(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid, bool isRedo); int32_t mndSetMoveVgroupInfoToTrans(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t vn, SArray *pArray); int32_t mndSetMoveVgroupsInfoToTrans(SMnode *, STrans *pTrans, int32_t dropDnodeId); +int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray); void *mndBuildCreateVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *cntlen, bool standby); void *mndBuildDropVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen); diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 61d6e7be6c..e6c93a9bfd 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -636,57 +636,6 @@ static int32_t mndSetAlterDbCommitLogs(SMnode *pMnode, STrans *pTrans, SDbObj *p return 0; } -static int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) { - if (pVgroup->replica <= 0 || pVgroup->replica == pDb->cfg.replications) { - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_CONFIG) != 0) { - return -1; - } - } else { - SVgObj newVgroup = {0}; - memcpy(&newVgroup, pVgroup, sizeof(SVgObj)); - mndTransSetSerial(pTrans); - - if (newVgroup.replica < pDb->cfg.replications) { - mInfo("db:%s, vgId:%d, vn:0 dnode:%d, will add 2 vnodes", pVgroup->dbName, pVgroup->vgId, - pVgroup->vnodeGid[0].dnodeId); - - if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; - if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[1], true) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - - if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; - if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[2], true) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - } else { - mInfo("db:%s, vgId:%d, will remove 2 vnodes", pVgroup->dbName, pVgroup->vgId); - - SVnodeGid del1 = {0}; - if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del1) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del1, true) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - - SVnodeGid del2 = {0}; - if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del2) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del2, true) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - } - - SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup); - if (pVgRaw == NULL) return -1; - if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) { - sdbFreeRaw(pVgRaw); - return -1; - } - sdbSetRawStatus(pVgRaw, SDB_STATUS_READY); - } - - return 0; -} - static int32_t mndSetAlterDbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj *pOld, SDbObj *pNew) { SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 7be4939337..3fab870277 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -566,9 +566,11 @@ static int32_t mndDropDnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SM pRaw = NULL; if (pMObj != NULL) { + mDebug("trans:%d, mnode on dnode:%d will be dropped", pTrans->id, pDnode->id); if (mndSetDropMnodeInfoToTrans(pMnode, pTrans, pMObj) != 0) goto _OVER; } if (numOfVnodes > 0) { + mDebug("trans:%d, %d vnodes on dnode:%d will be dropped", pTrans->id, numOfVnodes, pDnode->id); if (mndSetMoveVgroupsInfoToTrans(pMnode, pTrans, pDnode->id) != 0) goto _OVER; } if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 69300c0889..813e4c30b5 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -139,10 +139,40 @@ static int32_t mndCreateDir(SMnode *pMnode, const char *path) { return 0; } +static int32_t mndInitWal(SMnode *pMnode) { + char path[PATH_MAX + 20] = {0}; + snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); + SWalCfg cfg = { + .vgId = 1, + .fsyncPeriod = 0, + .rollPeriod = -1, + .segSize = -1, + .retentionPeriod = -1, + .retentionSize = -1, + .level = TAOS_WAL_FSYNC, + }; + + pMnode->pWal = walOpen(path, &cfg); + if (pMnode->pWal == NULL) { + mError("failed to open wal since %s", terrstr()); + return -1; + } + + return 0; +} + +static void mndCloseWal(SMnode *pMnode) { + if (pMnode->pWal != NULL) { + walClose(pMnode->pWal); + pMnode->pWal = NULL; + } +} + static int32_t mndInitSdb(SMnode *pMnode) { SSdbOpt opt = {0}; opt.path = pMnode->path; opt.pMnode = pMnode; + opt.pWal = pMnode->pWal; pMnode->pSdb = sdbInit(&opt); if (pMnode->pSdb == NULL) { @@ -156,7 +186,6 @@ static int32_t mndOpenSdb(SMnode *pMnode) { if (!pMnode->deploy) { return sdbReadFile(pMnode->pSdb); } else { - // return sdbDeploy(pMnode->pSdb);; return 0; } } @@ -182,6 +211,7 @@ static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCle } static int32_t mndInitSteps(SMnode *pMnode) { + if (mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal) != 0) return -1; if (mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb) != 0) return -1; if (mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans) != 0) return -1; if (mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster) != 0) return -1; @@ -201,7 +231,7 @@ static int32_t mndInitSteps(SMnode *pMnode) { if (mndAllocStep(pMnode, "mnode-offset", mndInitOffset, mndCleanupOffset) != 0) return -1; if (mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup) != 0) return -1; if (mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-stb", mndInitSma, mndCleanupSma) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma) != 0) return -1; if (mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos) != 0) return -1; if (mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs) != 0) return -1; if (mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb) != 0) return -1; @@ -376,41 +406,93 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { syncRpcMsgLog2(logBuf, pMsg); taosMemoryFree(syncNodeStr); - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); + // ToDo: ugly! use function pointer + if (syncNodeSnapshotEnable(pSyncNode)) { + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesSnapshotCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesReplySnapshotCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + + } else { + mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); + code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } + } else { - mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); - code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + } else { + mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); + code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } } mndReleaseSyncRef(pMnode); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index ce025d5547..a0daa72d9a 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -17,15 +17,27 @@ #include "mndSync.h" #include "mndTrans.h" -int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { +static int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { SMsgHead *pHead = pMsg->pCont; pHead->contLen = htonl(pHead->contLen); pHead->vgId = htonl(pHead->vgId); - return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; } -int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } +static int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { + int32_t code = tmsgSendReq(pEpSet, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; +} void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { SMnode *pMnode = pFsm->data; @@ -34,7 +46,7 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM int32_t transId = sdbGetIdFromRaw(pMnode->pSdb, pRaw); pMgmt->errCode = cbMeta.code; - mTrace("trans:%d, is proposed, savedTransId:%d code:0x%x, ver:%" PRId64 " term:%" PRId64 " role:%s raw:%p", transId, + mDebug("trans:%d, is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64 " role:%s raw:%p", transId, pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term, syncStr(cbMeta.state), pRaw); if (pMgmt->errCode == 0) { @@ -50,6 +62,10 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM tsem_post(&pMgmt->syncSem); } else { if (cbMeta.index - sdbGetApplyIndex(pMnode->pSdb) > 100) { + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } sdbWriteFile(pMnode->pSdb); } } @@ -57,13 +73,20 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { SMnode *pMnode = pFsm->data; - pSnapshot->lastApplyIndex = sdbGetApplyIndex(pMnode->pSdb); - pSnapshot->lastApplyTerm = sdbGetApplyTerm(pMnode->pSdb); + pSnapshot->lastApplyIndex = sdbGetCommitIndex(pMnode->pSdb); + pSnapshot->lastApplyTerm = sdbGetCommitTerm(pMnode->pSdb); + pSnapshot->lastConfigIndex = sdbGetCurConfig(pMnode->pSdb); return 0; } void mndRestoreFinish(struct SSyncFSM *pFsm) { SMnode *pMnode = pFsm->data; + + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } + if (!pMnode->deploy) { mInfo("mnode sync restore finished, and will handle outstanding transactions"); mndTransPullup(pMnode); @@ -78,8 +101,8 @@ void mndReConfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->errCode = cbMeta.code; - mInfo("trans:-1, sync reconfig is proposed, savedTransId:%d code:0x%x, curTerm:%" PRId64 " term:%" PRId64, - pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term); + mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64, pMgmt->transId, + cbMeta.code, cbMeta.index, cbMeta.term); if (pMgmt->transId == -1) { if (pMgmt->errCode != 0) { @@ -144,29 +167,12 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { int32_t mndInitSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; - char path[PATH_MAX + 20] = {0}; - snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); - SWalCfg cfg = { - .vgId = 1, - .fsyncPeriod = 0, - .rollPeriod = -1, - .segSize = -1, - .retentionPeriod = -1, - .retentionSize = -1, - .level = TAOS_WAL_FSYNC, - }; - - pMgmt->pWal = walOpen(path, &cfg); - if (pMgmt->pWal == NULL) { - mError("failed to open wal since %s", terrstr()); - return -1; - } - SSyncInfo syncInfo = {.vgId = 1, .FpSendMsg = mndSyncSendMsg, .FpEqMsg = mndSyncEqMsg}; snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", pMnode->path, TD_DIRSEP); - syncInfo.pWal = pMgmt->pWal; + syncInfo.pWal = pMnode->pWal; syncInfo.pFsm = mndSyncMakeFsm(pMnode); syncInfo.isStandBy = pMgmt->standby; + syncInfo.snapshotEnable = true; SSyncCfg *pCfg = &syncInfo.syncCfg; pCfg->replicaNum = pMnode->replica; @@ -196,10 +202,6 @@ void mndCleanupSync(SMnode *pMnode) { mDebug("mnode sync is stopped, id:%" PRId64, pMgmt->sync); tsem_destroy(&pMgmt->syncSem); - if (pMgmt->pWal != NULL) { - walClose(pMgmt->pWal); - } - memset(pMgmt, 0, sizeof(SSyncMgmt)); } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index c2097c069b..1e98a3bbf9 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -922,7 +922,7 @@ static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransActio char detail[1024] = {0}; int32_t len = snprintf(detail, sizeof(detail), "msgType:%s numOfEps:%d inUse:%d", TMSG_INFO(pAction->msgType), pAction->epSet.numOfEps, pAction->epSet.inUse); - for (int32_t i = 0; i < pTrans->lastErrorEpset.numOfEps; ++i) { + for (int32_t i = 0; i < pAction->epSet.numOfEps; ++i) { len += snprintf(detail + len, sizeof(detail) - len, " ep:%d-%s:%u", i, pAction->epSet.eps[i].fqdn, pAction->epSet.eps[i].port); } @@ -1085,6 +1085,8 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } if (code == 0) { + if (!pMnode->deploy && !mndIsMaster(pMnode)) break; + pTrans->code = 0; pTrans->redoActionPos++; mDebug("trans:%d, %s:%d is executed and need sync to other mnodes", pTrans->id, mndTransStr(pAction->stage), @@ -1386,6 +1388,10 @@ void mndTransPullup(SMnode *pMnode) { mndReleaseTrans(pMnode, pTrans); } + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } sdbWriteFile(pMnode->pSdb); taosArrayDestroy(pArray); } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 5244bc657b..76e65ddd92 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -55,6 +55,7 @@ int32_t mndInitVgroup(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_ALTER_REPLICA_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIG_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIRM_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_ALTER_HASHRANGE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp); @@ -1166,7 +1167,155 @@ _OVER: return code; } -static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { return 0; } +int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) { + if (pVgroup->replica <= 0 || pVgroup->replica == pDb->cfg.replications) { + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_CONFIG) != 0) { + return -1; + } + } else { + SVgObj newVgroup = {0}; + memcpy(&newVgroup, pVgroup, sizeof(SVgObj)); + mndTransSetSerial(pTrans); + + if (newVgroup.replica < pDb->cfg.replications) { + mInfo("db:%s, vgId:%d, vn:0 dnode:%d, will add 2 vnodes", pVgroup->dbName, pVgroup->vgId, + pVgroup->vnodeGid[0].dnodeId); + + if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[1], true) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + + if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[2], true) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + } else if (newVgroup.replica > pDb->cfg.replications) { + mInfo("db:%s, vgId:%d, will remove 2 vnodes", pVgroup->dbName, pVgroup->vgId); + + SVnodeGid del1 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del1) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del1, true) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + + SVnodeGid del2 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del2) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del2, true) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + } else { + } + + SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup); + if (pVgRaw == NULL) return -1; + if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) { + sdbFreeRaw(pVgRaw); + return -1; + } + sdbSetRawStatus(pVgRaw, SDB_STATUS_READY); + } + + return 0; +} + +static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) { + return 0; +} + +static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { + int32_t code = -1; + SSdbRaw *pRaw = NULL; + STrans *pTrans = NULL; + SArray *pArray = mndBuildDnodesArray(pMnode, 0); + + pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq); + if (pTrans == NULL) goto _OVER; + mndTransSetSerial(pTrans); + mDebug("trans:%d, used to split vgroup, vgId:%d", pTrans->id, pVgroup->vgId); + + SVgObj newVg1 = {0}; + memcpy(&newVg1, pVgroup, sizeof(SVgObj)); + mInfo("vgId:%d, vgroup info before split, replica:%d hashBegin:%u hashEnd:%u", newVg1.vgId, newVg1.replica, + newVg1.hashBegin, newVg1.hashEnd); + for (int32_t i = 0; i < newVg1.replica; ++i) { + mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId); + } + + if (newVg1.replica == 1) { + if (mndAddVnodeToVgroup(pMnode, &newVg1, pArray) != 0) goto _OVER; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg1, &newVg1.vnodeGid[1], true) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_REPLICA) != 0) goto _OVER; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER; + } else if (newVg1.replica == 3) { + SVnodeGid del1 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVg1, pArray, &del1) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_REPLICA) != 0) goto _OVER; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg1, &del1, true) != 0) goto _OVER; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER; + } else { + goto _OVER; + } + + SVgObj newVg2 = {0}; + memcpy(&newVg1, &newVg2, sizeof(SVgObj)); + newVg1.replica = 1; + newVg1.hashEnd = (newVg1.hashBegin + newVg1.hashEnd) / 2; + memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid)); + + newVg2.replica = 1; + newVg2.hashBegin = newVg1.hashEnd + 1; + memcpy(&newVg2.vnodeGid[0], &newVg2.vnodeGid[1], sizeof(SVnodeGid)); + memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid)); + + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_HASHRANGE) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg2, TDMT_VND_ALTER_HASHRANGE) != 0) goto _OVER; + + // adjust vgroup + if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, &newVg1, pArray) != 0) goto _OVER; + if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, &newVg2, pArray) != 0) goto _OVER; + +_OVER: + mndTransDrop(pTrans); + sdbFreeRaw(pRaw); + return code; +} + +static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + int32_t code = -1; + int32_t vgId = 2; + SUserObj *pUser = NULL; + SVgObj *pVgroup = NULL; + SDbObj *pDb = NULL; + + mDebug("vgId:%d, start to split", vgId); + + pVgroup = mndAcquireVgroup(pMnode, vgId); + if (pVgroup == NULL) goto _OVER; + + pDb = mndAcquireDb(pMnode, pVgroup->dbName); + if (pDb == NULL) goto _OVER; + + pUser = mndAcquireUser(pMnode, pReq->conn.user); + if (pUser == NULL) { + terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; + goto _OVER; + } + + if (mndCheckNodeAuth(pUser) != 0) { + goto _OVER; + } + + code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); + if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; + +_OVER: + mndReleaseUser(pMnode, pUser); + mndReleaseVgroup(pMnode, pVgroup); + mndReleaseDb(pMnode, pDb); + return code; +} static int32_t mndSetBalanceVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SDnodeObj *pSrc, SDnodeObj *pDst) { diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index 9444543804..8536c451b7 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -22,6 +22,7 @@ #include "tlockfree.h" #include "tlog.h" #include "tmsg.h" +#include "wal.h" #ifdef __cplusplus extern "C" { @@ -165,12 +166,14 @@ typedef struct SSdbRow { typedef struct SSdb { SMnode *pMnode; + SWal *pWal; char *currDir; char *tmpDir; int64_t lastCommitVer; int64_t lastCommitTerm; int64_t curVer; int64_t curTerm; + int64_t curConfig; int64_t tableVer[SDB_MAX]; int64_t maxId[SDB_MAX]; EKeyType keyTypes[SDB_MAX]; @@ -205,6 +208,7 @@ typedef struct { typedef struct SSdbOpt { const char *path; SMnode *pMnode; + SWal *pWal; } SSdbOpt; /** @@ -358,9 +362,13 @@ int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type); * @return int32_t The current index of sdb */ void sdbSetApplyIndex(SSdb *pSdb, int64_t index); -int64_t sdbGetApplyIndex(SSdb *pSdb); void sdbSetApplyTerm(SSdb *pSdb, int64_t term); +void sdbSetCurConfig(SSdb *pSdb, int64_t config); +int64_t sdbGetApplyIndex(SSdb *pSdb); int64_t sdbGetApplyTerm(SSdb *pSdb); +int64_t sdbGetCommitIndex(SSdb *pSdb); +int64_t sdbGetCommitTerm(SSdb *pSdb); +int64_t sdbGetCurConfig(SSdb *pSdb); SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen); void sdbFreeRaw(SSdbRaw *pRaw); diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 0526ea5c2d..c11aaaaa8a 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -52,10 +52,12 @@ SSdb *sdbInit(SSdbOpt *pOption) { pSdb->keyTypes[i] = SDB_KEY_INT32; } + pSdb->pWal = pOption->pWal; pSdb->curVer = -1; pSdb->curTerm = -1; pSdb->lastCommitVer = -1; pSdb->lastCommitTerm = -1; + pSdb->curConfig = -1; pSdb->pMnode = pOption->pMnode; taosThreadMutexInit(&pSdb->filelock, NULL); mDebug("sdb init successfully"); @@ -159,8 +161,16 @@ static int32_t sdbCreateDir(SSdb *pSdb) { void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } -int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } - void sdbSetApplyTerm(SSdb *pSdb, int64_t term) { pSdb->curTerm = term; } +void sdbSetCurConfig(SSdb *pSdb, int64_t config) { pSdb->curConfig = config; } + +int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } + int64_t sdbGetApplyTerm(SSdb *pSdb) { return pSdb->curTerm; } + +int64_t sdbGetCommitIndex(SSdb *pSdb) { return pSdb->lastCommitVer; } + +int64_t sdbGetCommitTerm(SSdb *pSdb) { return pSdb->lastCommitTerm; } + +int64_t sdbGetCurConfig(SSdb *pSdb) { return pSdb->curConfig; } \ No newline at end of file diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 83135491a9..f98ecf5343 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -110,6 +110,16 @@ static int32_t sdbReadFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + ret = taosReadFile(pFile, &pSdb->curConfig, sizeof(int64_t)); + if (ret < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + if (ret != sizeof(int64_t)) { + terrno = TSDB_CODE_FILE_CORRUPTED; + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; ret = taosReadFile(pFile, &maxId, sizeof(int64_t)); @@ -173,6 +183,11 @@ static int32_t sdbWriteFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + if (taosWriteFile(pFile, &pSdb->curConfig, sizeof(int64_t)) != sizeof(int64_t)) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; if (i < SDB_MAX) { @@ -288,8 +303,8 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { pSdb->lastCommitVer = pSdb->curVer; pSdb->lastCommitTerm = pSdb->curTerm; memcpy(pSdb->tableVer, tableVer, sizeof(tableVer)); - mDebug("read sdb file:%s successfully, ver:%" PRId64 " term:%" PRId64, file, pSdb->lastCommitVer, - pSdb->lastCommitTerm); + mDebug("read sdb file:%s successfully, index:%" PRId64 " term:%" PRId64 " config:%" PRId64, file, pSdb->lastCommitVer, + pSdb->lastCommitTerm, pSdb->curConfig); _OVER: taosCloseFile(&pFile); @@ -426,12 +441,23 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { } int32_t sdbWriteFile(SSdb *pSdb) { + int32_t code = 0; if (pSdb->curVer == pSdb->lastCommitVer) { return 0; } taosThreadMutexLock(&pSdb->filelock); - int32_t code = sdbWriteFileImp(pSdb); + if (pSdb->pWal != NULL) { + code = walBeginSnapshot(pSdb->pWal, pSdb->curVer); + } + if (code == 0) { + code = sdbWriteFileImp(pSdb); + } + if (code == 0) { + if (pSdb->pWal != NULL) { + code = walEndSnapshot(pSdb->pWal); + } + } if (code != 0) { mError("failed to write sdb file since %s", terrstr()); } @@ -496,6 +522,9 @@ int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter) { snprintf(datafile, sizeof(datafile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); taosThreadMutexLock(&pSdb->filelock); + int64_t commitIndex = pSdb->lastCommitVer; + int64_t commitTerm = pSdb->lastCommitTerm; + int64_t curConfig = pSdb->curConfig; if (taosCopyFile(datafile, pIter->name) < 0) { taosThreadMutexUnlock(&pSdb->filelock); terrno = TAOS_SYSTEM_ERROR(errno); @@ -514,7 +543,8 @@ int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter) { } *ppIter = pIter; - mInfo("sdbiter:%p, is created to read snapshot, file:%s", pIter, pIter->name); + mInfo("sdbiter:%p, is created to read snapshot, index:%" PRId64 " term:%" PRId64 " config:%" PRId64 " file:%s", pIter, + commitIndex, commitTerm, curConfig, pIter->name); return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 55ab5f5729..eb489a6d81 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -24,6 +24,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t version, void *pReq static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -164,6 +165,9 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp case TDMT_VND_ALTER_CONFIRM: vnodeProcessAlterConfirmReq(pVnode, version, pReq, len, pRsp); break; + case TDMT_VND_ALTER_HASHRANGE: + vnodeProcessAlterHasnRangeReq(pVnode, version, pReq, len, pRsp); + break; case TDMT_VND_ALTER_CONFIG: break; default: @@ -933,4 +937,14 @@ _err: vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), req.indexUid, req.version, terrstr()); return -1; +} + +static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { + vInfo("vgId:%d, alter hashrange msg will be processed", TD_VID(pVnode)); + + // todo + // 1. stop work + // 2. adjust hash range / compact / remove wals / rename vgroups + // 3. reload sync + return 0; } \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 203eecd8ab..816c0cfac9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -252,6 +252,8 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { SSyncInfo syncInfo = { + .isStandBy = false, + .snapshotEnable = false, .vgId = pVnode->config.vgId, .isStandBy = pVnode->config.standby, .syncCfg = pVnode->config.syncCfg, diff --git a/source/libs/index/inc/indexCache.h b/source/libs/index/inc/indexCache.h index 6c95eb987b..8b5885d58b 100644 --- a/source/libs/index/inc/indexCache.h +++ b/source/libs/index/inc/indexCache.h @@ -62,25 +62,25 @@ typedef struct CacheTerm { } CacheTerm; // -IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type); +IndexCache* idxCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type); -void indexCacheForceToMerge(void* cache); -void indexCacheDestroy(void* cache); -void indexCacheBroadcast(void* cache); -void indexCacheWait(void* cache); +void idxCacheForceToMerge(void* cache); +void idxCacheDestroy(void* cache); +void idxCacheBroadcast(void* cache); +void idxCacheWait(void* cache); -Iterate* indexCacheIteratorCreate(IndexCache* cache); +Iterate* idxCacheIteratorCreate(IndexCache* cache); void idxCacheIteratorDestroy(Iterate* iiter); -int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid); +int idxCachePut(void* cache, SIndexTerm* term, uint64_t uid); // int indexCacheGet(void *cache, uint64_t *rst); -int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s); +int idxCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s); -void indexCacheRef(IndexCache* cache); -void indexCacheUnRef(IndexCache* cache); +void idxCacheRef(IndexCache* cache); +void idxCacheUnRef(IndexCache* cache); -void indexCacheDebug(IndexCache* cache); +void idxCacheDebug(IndexCache* cache); void idxCacheDestroyImm(IndexCache* cache); #ifdef __cplusplus diff --git a/source/libs/index/inc/indexComm.h b/source/libs/index/inc/indexComm.h index bccba98116..09fd2f3555 100644 --- a/source/libs/index/inc/indexComm.h +++ b/source/libs/index/inc/indexComm.h @@ -34,11 +34,11 @@ typedef enum { MATCH, CONTINUE, BREAK } TExeCond; typedef TExeCond (*_cache_range_compare)(void* a, void* b, int8_t type); -__compar_fn_t indexGetCompar(int8_t type); +__compar_fn_t idxGetCompar(int8_t type); TExeCond tCompare(__compar_fn_t func, int8_t cmpType, void* a, void* b, int8_t dType); TExeCond tDoCompare(__compar_fn_t func, int8_t cmpType, void* a, void* b); -_cache_range_compare indexGetCompare(RangeType ty); +_cache_range_compare idxGetCompare(RangeType ty); int32_t idxConvertData(void* src, int8_t type, void** dst); int32_t idxConvertDataToStr(void* src, int8_t type, void** dst); diff --git a/source/libs/index/inc/indexInt.h b/source/libs/index/inc/indexInt.h index 47f7260d3a..906cbb6a20 100644 --- a/source/libs/index/inc/indexInt.h +++ b/source/libs/index/inc/indexInt.h @@ -133,24 +133,24 @@ typedef struct TFileCacheKey { } ICacheKey; int idxFlushCacheToTFile(SIndex* sIdx, void*, bool quit); -int64_t indexAddRef(void* p); -int32_t indexRemoveRef(int64_t ref); -void indexAcquireRef(int64_t ref); -void indexReleaseRef(int64_t ref); +int64_t idxAddRef(void* p); +int32_t idxRemoveRef(int64_t ref); +void idxAcquireRef(int64_t ref); +void idxReleaseRef(int64_t ref); -int32_t indexSerialCacheKey(ICacheKey* key, char* buf); +int32_t idxSerialCacheKey(ICacheKey* key, char* buf); // int32_t indexSerialKey(ICacheKey* key, char* buf); // int32_t indexSerialTermKey(SIndexTerm* itm, char* buf); -#define INDEX_TYPE_CONTAIN_EXTERN_TYPE(ty, exTy) (((ty >> 4) & (exTy)) != 0) +#define IDX_TYPE_CONTAIN_EXTERN_TYPE(ty, exTy) (((ty >> 4) & (exTy)) != 0) -#define INDEX_TYPE_GET_TYPE(ty) (ty & 0x0F) +#define IDX_TYPE_GET_TYPE(ty) (ty & 0x0F) -#define INDEX_TYPE_ADD_EXTERN_TYPE(ty, exTy) \ - do { \ - uint8_t oldTy = ty; \ - ty = (ty >> 4) | exTy; \ - ty = (ty << 4) | oldTy; \ +#define IDX_TYPE_ADD_EXTERN_TYPE(ty, exTy) \ + do { \ + uint8_t oldTy = ty; \ + ty = (ty >> 4) | exTy; \ + ty = (ty << 4) | oldTy; \ } while (0) #ifdef __cplusplus diff --git a/source/libs/index/inc/indexTfile.h b/source/libs/index/inc/indexTfile.h index ca55aa93da..6cfea5bc0b 100644 --- a/source/libs/index/inc/indexTfile.h +++ b/source/libs/index/inc/indexTfile.h @@ -117,10 +117,10 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order); int tfileWriterFinish(TFileWriter* tw); // -IndexTFile* indexTFileCreate(const char* path); -void indexTFileDestroy(IndexTFile* tfile); -int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid); -int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* tr); +IndexTFile* idxTFileCreate(const char* path); +void idxTFileDestroy(IndexTFile* tfile); +int idxTFilePut(void* tfile, SIndexTerm* term, uint64_t uid); +int idxTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* tr); Iterate* tfileIteratorCreate(TFileReader* reader); void tfileIteratorDestroy(Iterate* iterator); diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index 9b8bee5623..04d7e04b30 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -90,7 +90,7 @@ static void idxMergeCacheAndTFile(SArray* result, IterateValue* icache, IterateV // static int32_t indexSerialTermKey(SIndexTerm* itm, char* buf); // int32_t indexSerialKey(ICacheKey* key, char* buf); -static void indexPost(void* idx) { +static void idxPost(void* idx) { SIndex* pIdx = idx; tsem_post(&pIdx->sem); } @@ -106,8 +106,8 @@ int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { return -1; } - // sIdx->cache = (void*)indexCacheCreate(sIdx); - sIdx->tindex = indexTFileCreate(path); + // sIdx->cache = (void*)idxCacheCreate(sIdx); + sIdx->tindex = idxTFileCreate(path); if (sIdx->tindex == NULL) { goto END; } @@ -118,8 +118,8 @@ int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { taosThreadMutexInit(&sIdx->mtx, NULL); tsem_init(&sIdx->sem, 0, 0); - sIdx->refId = indexAddRef(sIdx); - indexAcquireRef(sIdx->refId); + sIdx->refId = idxAddRef(sIdx); + idxAcquireRef(sIdx->refId); *index = sIdx; return 0; @@ -136,7 +136,7 @@ void indexDestroy(void* handle) { SIndex* sIdx = handle; taosThreadMutexDestroy(&sIdx->mtx); tsem_destroy(&sIdx->sem); - indexTFileDestroy(sIdx->tindex); + idxTFileDestroy(sIdx->tindex); taosMemoryFree(sIdx->path); taosMemoryFree(sIdx); return; @@ -147,33 +147,33 @@ void indexClose(SIndex* sIdx) { void* iter = taosHashIterate(sIdx->colObj, NULL); while (iter) { IndexCache** pCache = iter; - indexCacheForceToMerge((void*)(*pCache)); + idxCacheForceToMerge((void*)(*pCache)); indexInfo("%s wait to merge", (*pCache)->colName); indexWait((void*)(sIdx)); indexInfo("%s finish to wait", (*pCache)->colName); iter = taosHashIterate(sIdx->colObj, iter); - indexCacheUnRef(*pCache); + idxCacheUnRef(*pCache); } taosHashCleanup(sIdx->colObj); sIdx->colObj = NULL; } - indexReleaseRef(sIdx->refId); - indexRemoveRef(sIdx->refId); + idxReleaseRef(sIdx->refId); + idxRemoveRef(sIdx->refId); } -int64_t indexAddRef(void* p) { +int64_t idxAddRef(void* p) { // impl return taosAddRef(indexRefMgt, p); } -int32_t indexRemoveRef(int64_t ref) { +int32_t idxRemoveRef(int64_t ref) { // impl later return taosRemoveRef(indexRefMgt, ref); } -void indexAcquireRef(int64_t ref) { +void idxAcquireRef(int64_t ref) { // impl taosAcquireRef(indexRefMgt, ref); } -void indexReleaseRef(int64_t ref) { +void idxReleaseRef(int64_t ref) { // impl taosReleaseRef(indexRefMgt, ref); } @@ -186,11 +186,11 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) { char buf[128] = {0}; ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); IndexCache** cache = taosHashGet(index->colObj, buf, sz); if (cache == NULL) { - IndexCache* pCache = indexCacheCreate(index, p->suid, p->colName, p->colType); + IndexCache* pCache = idxCacheCreate(index, p->suid, p->colName, p->colType); taosHashPut(index->colObj, buf, sz, &pCache, sizeof(void*)); } } @@ -201,12 +201,12 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) { char buf[128] = {0}; ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); indexDebug("w suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType); IndexCache** cache = taosHashGet(index->colObj, buf, sz); assert(*cache != NULL); - int ret = indexCachePut(*cache, p, uid); + int ret = idxCachePut(*cache, p, uid); if (ret != 0) { return ret; } @@ -289,7 +289,7 @@ SIndexTerm* indexTermCreate(int64_t suid, SIndexOperOnColumn oper, uint8_t colTy tm->nColName = nColName; char* buf = NULL; - int32_t len = idxConvertDataToStr((void*)colVal, INDEX_TYPE_GET_TYPE(colType), (void**)&buf); + int32_t len = idxConvertDataToStr((void*)colVal, IDX_TYPE_GET_TYPE(colType), (void**)&buf); assert(len != -1); tm->colVal = buf; @@ -331,7 +331,7 @@ static int idxTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result) ICacheKey key = { .suid = term->suid, .colName = term->colName, .nColName = strlen(term->colName), .colType = term->colType}; indexDebug("r suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType); - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); taosThreadMutexLock(&sIdx->mtx); IndexCache** pCache = taosHashGet(sIdx->colObj, buf, sz); @@ -345,14 +345,14 @@ static int idxTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result) int64_t st = taosGetTimestampUs(); SIdxTRslt* tr = idxTRsltCreate(); - if (0 == indexCacheSearch(cache, query, tr, &s)) { + if (0 == idxCacheSearch(cache, query, tr, &s)) { if (s == kTypeDeletion) { indexInfo("col: %s already drop by", term->colName); // coloum already drop by other oper, no need to query tindex return 0; } else { st = taosGetTimestampUs(); - if (0 != indexTFileSearch(sIdx->tindex, query, tr)) { + if (0 != idxTFileSearch(sIdx->tindex, query, tr)) { indexError("corrupt at index(TFile) col:%s val: %s", term->colName, term->colVal); goto END; } @@ -465,23 +465,23 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { IndexCache* pCache = (IndexCache*)cache; - while (quit && atomic_load_32(&pCache->merging) == 1) { - } + while (quit && atomic_load_32(&pCache->merging) == 1) + ; TFileReader* pReader = tfileGetReaderByCol(sIdx->tindex, pCache->suid, pCache->colName); if (pReader == NULL) { indexWarn("empty tfile reader found"); } // handle flush - Iterate* cacheIter = indexCacheIteratorCreate(pCache); + Iterate* cacheIter = idxCacheIteratorCreate(pCache); if (cacheIter == NULL) { indexError("%p immtable is empty, ignore merge opera", pCache); idxCacheDestroyImm(pCache); tfileReaderUnRef(pReader); atomic_store_32(&pCache->merging, 0); if (quit) { - indexPost(sIdx); + idxPost(sIdx); } - indexReleaseRef(sIdx->refId); + idxReleaseRef(sIdx->refId); return 0; } @@ -532,7 +532,7 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { tfileIteratorDestroy(tfileIter); tfileReaderUnRef(pReader); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); int64_t cost = taosGetTimestampUs() - st; if (ret != 0) { @@ -542,9 +542,9 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { } atomic_store_32(&pCache->merging, 0); if (quit) { - indexPost(sIdx); + idxPost(sIdx); } - indexReleaseRef(sIdx->refId); + idxReleaseRef(sIdx->refId); return ret; } @@ -561,7 +561,7 @@ void iterateValueDestroy(IterateValue* value, bool destroy) { value->colVal = NULL; } -static int64_t indexGetAvaialbleVer(SIndex* sIdx, IndexCache* cache) { +static int64_t idxGetAvailableVer(SIndex* sIdx, IndexCache* cache) { ICacheKey key = {.suid = cache->suid, .colName = cache->colName, .nColName = strlen(cache->colName)}; int64_t ver = CACHE_VERSION(cache); @@ -579,7 +579,7 @@ static int64_t indexGetAvaialbleVer(SIndex* sIdx, IndexCache* cache) { return ver; } static int idxGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) { - int64_t version = indexGetAvaialbleVer(sIdx, cache); + int64_t version = idxGetAvailableVer(sIdx, cache); indexInfo("file name version: %" PRId64 "", version); uint8_t colType = cache->type; @@ -620,8 +620,8 @@ END: return -1; } -int32_t indexSerialCacheKey(ICacheKey* key, char* buf) { - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(key->colType, TSDB_DATA_TYPE_JSON); +int32_t idxSerialCacheKey(ICacheKey* key, char* buf) { + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(key->colType, TSDB_DATA_TYPE_JSON); char* p = buf; char tbuf[65] = {0}; diff --git a/source/libs/index/src/indexCache.c b/source/libs/index/src/indexCache.c index 20cd9c8b4c..040e8ed830 100644 --- a/source/libs/index/src/indexCache.c +++ b/source/libs/index/src/indexCache.c @@ -68,7 +68,7 @@ static int32_t (*cacheSearch[][QUERY_MAX])(void* cache, SIndexTerm* ct, SIdxTRsl cacheSearchLessThan_JSON, cacheSearchLessEqual_JSON, cacheSearchGreaterThan_JSON, cacheSearchGreaterEqual_JSON, cacheSearchRange_JSON}}; -static void doMergeWork(SSchedMsg* msg); +static void idxDoMergeWork(SSchedMsg* msg); static bool idxCacheIteratorNext(Iterate* itera); static int32_t cacheSearchTerm(void* cache, SIndexTerm* term, SIdxTRslt* tr, STermValueType* s) { @@ -127,7 +127,7 @@ static int32_t cacheSearchCompareFunc(void* cache, SIndexTerm* term, SIdxTRslt* MemTable* mem = cache; IndexCache* pCache = mem->pCache; - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); CacheTerm* pCt = taosMemoryCalloc(1, sizeof(CacheTerm)); pCt->colVal = term->colVal; @@ -187,7 +187,7 @@ static int32_t cacheSearchTerm_JSON(void* cache, SIndexTerm* term, SIdxTRslt* tr pCt->version = atomic_load_64(&pCache->version); char* exBuf = NULL; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { exBuf = idxPackJsonData(term); pCt->colVal = exBuf; } @@ -257,7 +257,7 @@ static int32_t cacheSearchCompareFunc_JSON(void* cache, SIndexTerm* term, SIdxTR if (cache == NULL) { return 0; } - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); MemTable* mem = cache; IndexCache* pCache = mem->pCache; @@ -266,7 +266,7 @@ static int32_t cacheSearchCompareFunc_JSON(void* cache, SIndexTerm* term, SIdxTR pCt->colVal = term->colVal; pCt->version = atomic_load_64(&pCache->version); - int8_t dType = INDEX_TYPE_GET_TYPE(term->colType); + int8_t dType = IDX_TYPE_GET_TYPE(term->colType); int skip = 0; char* exBuf = NULL; if (type == CONTAINS) { @@ -331,9 +331,9 @@ static int32_t cacheSearchRange(void* cache, SIndexTerm* term, SIdxTRslt* tr, ST // impl later return 0; } -static IterateValue* indexCacheIteratorGetValue(Iterate* iter); +static IterateValue* idxCacheIteratorGetValue(Iterate* iter); -IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type) { +IndexCache* idxCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type) { IndexCache* cache = taosMemoryCalloc(1, sizeof(IndexCache)); if (cache == NULL) { indexError("failed to create index cache"); @@ -342,7 +342,7 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in cache->mem = idxInternalCacheCreate(type); cache->mem->pCache = cache; - cache->colName = INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? tstrdup(JSON_COLUMN) : tstrdup(colName); + cache->colName = IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? tstrdup(JSON_COLUMN) : tstrdup(colName); cache->type = type; cache->index = idx; cache->version = 0; @@ -352,13 +352,13 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in taosThreadMutexInit(&cache->mtx, NULL); taosThreadCondInit(&cache->finished, NULL); - indexCacheRef(cache); + idxCacheRef(cache); if (idx != NULL) { - indexAcquireRef(idx->refId); + idxAcquireRef(idx->refId); } return cache; } -void indexCacheDebug(IndexCache* cache) { +void idxCacheDebug(IndexCache* cache) { MemTable* tbl = NULL; taosThreadMutexLock(&cache->mtx); @@ -405,7 +405,7 @@ void indexCacheDebug(IndexCache* cache) { } } -void indexCacheDestroySkiplist(SSkipList* slt) { +void idxCacheDestroySkiplist(SSkipList* slt) { SSkipListIterator* iter = tSkipListCreateIter(slt); while (iter != NULL && tSkipListIterNext(iter)) { SSkipListNode* node = tSkipListIterGet(iter); @@ -418,11 +418,11 @@ void indexCacheDestroySkiplist(SSkipList* slt) { tSkipListDestroyIter(iter); tSkipListDestroy(slt); } -void indexCacheBroadcast(void* cache) { +void idxCacheBroadcast(void* cache) { IndexCache* pCache = cache; taosThreadCondBroadcast(&pCache->finished); } -void indexCacheWait(void* cache) { +void idxCacheWait(void* cache) { IndexCache* pCache = cache; taosThreadCondWait(&pCache->finished, &pCache->mtx); } @@ -435,14 +435,14 @@ void idxCacheDestroyImm(IndexCache* cache) { tbl = cache->imm; cache->imm = NULL; // or throw int bg thread - indexCacheBroadcast(cache); + idxCacheBroadcast(cache); taosThreadMutexUnlock(&cache->mtx); idxMemUnRef(tbl); idxMemUnRef(tbl); } -void indexCacheDestroy(void* cache) { +void idxCacheDestroy(void* cache) { IndexCache* pCache = cache; if (pCache == NULL) { return; @@ -455,12 +455,12 @@ void indexCacheDestroy(void* cache) { taosThreadMutexDestroy(&pCache->mtx); taosThreadCondDestroy(&pCache->finished); if (pCache->index != NULL) { - indexReleaseRef(((SIndex*)pCache->index)->refId); + idxReleaseRef(((SIndex*)pCache->index)->refId); } taosMemoryFree(pCache); } -Iterate* indexCacheIteratorCreate(IndexCache* cache) { +Iterate* idxCacheIteratorCreate(IndexCache* cache) { if (cache->imm == NULL) { return NULL; } @@ -477,7 +477,7 @@ Iterate* indexCacheIteratorCreate(IndexCache* cache) { iiter->val.colVal = NULL; iiter->iter = tbl != NULL ? tSkipListCreateIter(tbl->mem) : NULL; iiter->next = idxCacheIteratorNext; - iiter->getValue = indexCacheIteratorGetValue; + iiter->getValue = idxCacheIteratorGetValue; taosThreadMutexUnlock(&cache->mtx); @@ -492,30 +492,30 @@ void idxCacheIteratorDestroy(Iterate* iter) { taosMemoryFree(iter); } -int indexCacheSchedToMerge(IndexCache* pCache, bool notify) { +int idxCacheSchedToMerge(IndexCache* pCache, bool notify) { SSchedMsg schedMsg = {0}; - schedMsg.fp = doMergeWork; + schedMsg.fp = idxDoMergeWork; schedMsg.ahandle = pCache; if (notify) { schedMsg.thandle = taosMemoryMalloc(1); } schedMsg.msg = NULL; - indexAcquireRef(pCache->index->refId); + idxAcquireRef(pCache->index->refId); taosScheduleTask(indexQhandle, &schedMsg); return 0; } -static void indexCacheMakeRoomForWrite(IndexCache* cache) { +static void idxCacheMakeRoomForWrite(IndexCache* cache) { while (true) { if (cache->occupiedMem * MEM_ESTIMATE_RADIO < MEM_THRESHOLD) { break; } else if (cache->imm != NULL) { // TODO: wake up by condition variable - indexCacheWait(cache); + idxCacheWait(cache); } else { bool quit = cache->occupiedMem >= MEM_SIGNAL_QUIT ? true : false; - indexCacheRef(cache); + idxCacheRef(cache); cache->imm = cache->mem; cache->mem = idxInternalCacheCreate(cache->type); cache->mem->pCache = cache; @@ -525,18 +525,18 @@ static void indexCacheMakeRoomForWrite(IndexCache* cache) { } // sched to merge // unref cache in bgwork - indexCacheSchedToMerge(cache, quit); + idxCacheSchedToMerge(cache, quit); } } } -int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { +int idxCachePut(void* cache, SIndexTerm* term, uint64_t uid) { if (cache == NULL) { return -1; } - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON); + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON); IndexCache* pCache = cache; - indexCacheRef(pCache); + idxCacheRef(pCache); // encode data CacheTerm* ct = taosMemoryCalloc(1, sizeof(CacheTerm)); if (cache == NULL) { @@ -559,7 +559,7 @@ int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { taosThreadMutexLock(&pCache->mtx); pCache->occupiedMem += estimate; - indexCacheMakeRoomForWrite(pCache); + idxCacheMakeRoomForWrite(pCache); MemTable* tbl = pCache->mem; idxMemRef(tbl); tSkipListPut(tbl->mem, (char*)ct); @@ -567,29 +567,29 @@ int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { taosThreadMutexUnlock(&pCache->mtx); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); return 0; // encode end } -void indexCacheForceToMerge(void* cache) { +void idxCacheForceToMerge(void* cache) { IndexCache* pCache = cache; - indexCacheRef(pCache); + idxCacheRef(pCache); taosThreadMutexLock(&pCache->mtx); indexInfo("%p is forced to merge into tfile", pCache); pCache->occupiedMem += MEM_SIGNAL_QUIT; - indexCacheMakeRoomForWrite(pCache); + idxCacheMakeRoomForWrite(pCache); taosThreadMutexUnlock(&pCache->mtx); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); return; } -int indexCacheDel(void* cache, const char* fieldValue, int32_t fvlen, uint64_t uid, int8_t operType) { +int idxCacheDel(void* cache, const char* fieldValue, int32_t fvlen, uint64_t uid, int8_t operType) { IndexCache* pCache = cache; return 0; } -static int32_t indexQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s) { +static int32_t idxQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s) { if (mem == NULL) { return 0; } @@ -597,13 +597,13 @@ static int32_t indexQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* t SIndexTerm* term = query->term; EIndexQueryType qtype = query->qType; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { return cacheSearch[1][qtype](mem, term, tr, s); } else { return cacheSearch[0][qtype](mem, term, tr, s); } } -int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STermValueType* s) { +int idxCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STermValueType* s) { int64_t st = taosGetTimestampUs(); if (cache == NULL) { return 0; @@ -618,10 +618,10 @@ int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STe idxMemRef(imm); taosThreadMutexUnlock(&pCache->mtx); - int ret = (mem && mem->mem) ? indexQueryMem(mem, query, result, s) : 0; + int ret = (mem && mem->mem) ? idxQueryMem(mem, query, result, s) : 0; if (ret == 0 && *s != kTypeDeletion) { // continue search in imm - ret = (imm && imm->mem) ? indexQueryMem(imm, query, result, s) : 0; + ret = (imm && imm->mem) ? idxQueryMem(imm, query, result, s) : 0; } idxMemUnRef(mem); @@ -631,20 +631,20 @@ int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STe return ret; } -void indexCacheRef(IndexCache* cache) { +void idxCacheRef(IndexCache* cache) { if (cache == NULL) { return; } int ref = T_REF_INC(cache); UNUSED(ref); } -void indexCacheUnRef(IndexCache* cache) { +void idxCacheUnRef(IndexCache* cache) { if (cache == NULL) { return; } int ref = T_REF_DEC(cache); if (ref == 0) { - indexCacheDestroy(cache); + idxCacheDestroy(cache); } } @@ -662,7 +662,7 @@ void idxMemUnRef(MemTable* tbl) { int ref = T_REF_DEC(tbl); if (ref == 0) { SSkipList* slt = tbl->mem; - indexCacheDestroySkiplist(slt); + idxCacheDestroySkiplist(slt); taosMemoryFree(tbl); } } @@ -693,15 +693,15 @@ static int32_t idxCacheTermCompare(const void* l, const void* r) { return cmp; } -static int indexFindCh(char* a, char c) { +static int idxFindCh(char* a, char c) { char* p = a; while (*p != 0 && *p++ != c) { } return p - a; } static int idxCacheJsonTermCompareImpl(char* a, char* b) { - // int alen = indexFindCh(a, '&'); - // int blen = indexFindCh(b, '&'); + // int alen = idxFindCh(a, '&'); + // int blen = idxFindCh(b, '&'); // int cmp = strncmp(a, b, MIN(alen, blen)); // if (cmp == 0) { @@ -730,9 +730,9 @@ static int32_t idxCacheJsonTermCompare(const void* l, const void* r) { return cmp; } static MemTable* idxInternalCacheCreate(int8_t type) { - int ttype = INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? TSDB_DATA_TYPE_BINARY : TSDB_DATA_TYPE_BINARY; + int ttype = IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? TSDB_DATA_TYPE_BINARY : TSDB_DATA_TYPE_BINARY; int32_t (*cmpFn)(const void* l, const void* r) = - INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? idxCacheJsonTermCompare : idxCacheTermCompare; + IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? idxCacheJsonTermCompare : idxCacheTermCompare; MemTable* tbl = taosMemoryCalloc(1, sizeof(MemTable)); idxMemRef(tbl); @@ -742,7 +742,7 @@ static MemTable* idxInternalCacheCreate(int8_t type) { return tbl; } -static void doMergeWork(SSchedMsg* msg) { +static void idxDoMergeWork(SSchedMsg* msg) { IndexCache* pCache = msg->ahandle; SIndex* sidx = (SIndex*)pCache->index; @@ -771,7 +771,7 @@ static bool idxCacheIteratorNext(Iterate* itera) { return next; } -static IterateValue* indexCacheIteratorGetValue(Iterate* iter) { +static IterateValue* idxCacheIteratorGetValue(Iterate* iter) { // opt later return &iter->val; } diff --git a/source/libs/index/src/indexComm.c b/source/libs/index/src/indexComm.c index be9243b8dd..99b49f97bd 100644 --- a/source/libs/index/src/indexComm.c +++ b/source/libs/index/src/indexComm.c @@ -75,35 +75,35 @@ char* idxInt2str(int64_t val, char* dst, int radix) { ; return dst - 1; } -__compar_fn_t indexGetCompar(int8_t type) { +__compar_fn_t idxGetCompar(int8_t type) { if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) { return (__compar_fn_t)strcmp; } return getComparFunc(type, 0); } static TExeCond tCompareLessThan(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_LESS_THAN, a, b, type); } static TExeCond tCompareLessEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_LESS_EQUAL, a, b, type); } static TExeCond tCompareGreaterThan(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_GREATER_THAN, a, b, type); } static TExeCond tCompareGreaterEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_GREATER_EQUAL, a, b, type); } static TExeCond tCompareContains(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_TERM, a, b, type); } static TExeCond tCompareEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_TERM, a, b, type); } TExeCond tCompare(__compar_fn_t func, int8_t cmptype, void* a, void* b, int8_t dtype) { @@ -205,14 +205,14 @@ TExeCond tDoCompare(__compar_fn_t func, int8_t comparType, void* a, void* b) { static TExeCond (*rangeCompare[])(void* a, void* b, int8_t type) = { tCompareLessThan, tCompareLessEqual, tCompareGreaterThan, tCompareGreaterEqual, tCompareContains, tCompareEqual}; -_cache_range_compare indexGetCompare(RangeType ty) { return rangeCompare[ty]; } +_cache_range_compare idxGetCompare(RangeType ty) { return rangeCompare[ty]; } char* idxPackJsonData(SIndexTerm* itm) { /* * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); @@ -240,7 +240,7 @@ char* idxPackJsonDataPrefix(SIndexTerm* itm, int32_t* skip) { * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); @@ -267,7 +267,7 @@ char* idxPackJsonDataPrefixNoType(SIndexTerm* itm, int32_t* skip) { * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index e4af4a7a3f..bd78ec574a 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -318,7 +318,7 @@ int sifLessThan(void *a, void *b, int16_t dtype) { } int sifEqual(void *a, void *b, int16_t dtype) { __compar_fn_t func = getComparFunc(dtype, 0); - //__compar_fn_t func = indexGetCompar(dtype); + //__compar_fn_t func = idxGetCompar(dtype); return (int)tDoCompare(func, QUERY_TERM, a, b); } static Filter sifGetFilterFunc(EIndexQueryType type, bool *reverse) { diff --git a/source/libs/index/src/indexJson.c b/source/libs/index/src/indexJson.c index 88b3d907bb..8ce625dfb9 100644 --- a/source/libs/index/src/indexJson.c +++ b/source/libs/index/src/indexJson.c @@ -30,7 +30,7 @@ int indexJsonPut(SIndexJson *index, SIndexJsonMultiTerm *terms, uint64_t uid) { } else { p->colType = TSDB_DATA_TYPE_DOUBLE; } - INDEX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); + IDX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); } // handle put return indexPut(index, terms, uid); @@ -48,7 +48,7 @@ int indexJsonSearch(SIndexJson *index, SIndexJsonMultiTermQuery *tq, SArray *res } else { p->colType = TSDB_DATA_TYPE_DOUBLE; } - INDEX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); + IDX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); } // handle search return indexSearch(index, tq, result); diff --git a/source/libs/index/src/indexTfile.c b/source/libs/index/src/indexTfile.c index b64db1dde4..d632540ee1 100644 --- a/source/libs/index/src/indexTfile.c +++ b/source/libs/index/src/indexTfile.c @@ -118,7 +118,7 @@ TFileCache* tfileCacheCreate(const char* path) { ICacheKey key = {.suid = header->suid, .colName = header->colName, .nColName = (int32_t)strlen(header->colName)}; char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); assert(sz < sizeof(buf)); taosHashPut(tcache->tableCache, buf, sz, &reader, sizeof(void*)); tfileReaderRef(reader); @@ -149,7 +149,7 @@ void tfileCacheDestroy(TFileCache* tcache) { TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) { char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(key, buf); + int32_t sz = idxSerialCacheKey(key, buf); assert(sz < sizeof(buf)); TFileReader** reader = taosHashGet(tcache->tableCache, buf, sz); if (reader == NULL || *reader == NULL) { @@ -161,7 +161,7 @@ TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) { } void tfileCachePut(TFileCache* tcache, ICacheKey* key, TFileReader* reader) { char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(key, buf); + int32_t sz = idxSerialCacheKey(key, buf); // remove last version index reader TFileReader** p = taosHashGet(tcache->tableCache, buf, sz); if (p != NULL && *p != NULL) { @@ -281,7 +281,7 @@ static int32_t tfSearchSuffix(void* reader, SIndexTerm* tem, SIdxTRslt* tr) { return 0; } static int32_t tfSearchRegex(void* reader, SIndexTerm* tem, SIdxTRslt* tr) { - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(tem->colType, TSDB_DATA_TYPE_JSON); + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(tem->colType, TSDB_DATA_TYPE_JSON); int ret = 0; char* p = tem->colVal; @@ -305,7 +305,7 @@ static int32_t tfSearchCompareFunc(void* reader, SIndexTerm* tem, SIdxTRslt* tr, int ret = 0; char* p = tem->colVal; int skip = 0; - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); SArray* offsets = taosArrayInit(16, sizeof(uint64_t)); @@ -431,7 +431,7 @@ static int32_t tfSearchCompareFunc_JSON(void* reader, SIndexTerm* tem, SIdxTRslt p = idxPackJsonDataPrefix(tem, &skip); } - _cache_range_compare cmpFn = indexGetCompare(ctype); + _cache_range_compare cmpFn = idxGetCompare(ctype); SArray* offsets = taosArrayInit(16, sizeof(uint64_t)); @@ -457,7 +457,7 @@ static int32_t tfSearchCompareFunc_JSON(void* reader, SIndexTerm* tem, SIdxTRslt } else if (0 != strncmp(ch, p, skip)) { continue; } - cond = cmpFn(ch + skip, tem->colVal, INDEX_TYPE_GET_TYPE(tem->colType)); + cond = cmpFn(ch + skip, tem->colVal, IDX_TYPE_GET_TYPE(tem->colType)); } if (MATCH == cond) { tfileReaderLoadTableIds((TFileReader*)reader, rt->out.out, tr->total); @@ -476,7 +476,7 @@ int tfileReaderSearch(TFileReader* reader, SIndexTermQuery* query, SIdxTRslt* tr SIndexTerm* term = query->term; EIndexQueryType qtype = query->qType; int ret = 0; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { ret = tfSearch[1][qtype](reader, term, tr); } else { ret = tfSearch[0][qtype](reader, term, tr); @@ -536,7 +536,7 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { __compar_fn_t fn; int8_t colType = tw->header.colType; - colType = INDEX_TYPE_GET_TYPE(colType); + colType = IDX_TYPE_GET_TYPE(colType); if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) { fn = tfileStrCompare; } else { @@ -620,7 +620,7 @@ void tfileWriterDestroy(TFileWriter* tw) { taosMemoryFree(tw); } -IndexTFile* indexTFileCreate(const char* path) { +IndexTFile* idxTFileCreate(const char* path) { TFileCache* cache = tfileCacheCreate(path); if (cache == NULL) { return NULL; @@ -635,7 +635,7 @@ IndexTFile* indexTFileCreate(const char* path) { tfile->cache = cache; return tfile; } -void indexTFileDestroy(IndexTFile* tfile) { +void idxTFileDestroy(IndexTFile* tfile) { if (tfile == NULL) { return; } @@ -644,7 +644,7 @@ void indexTFileDestroy(IndexTFile* tfile) { taosMemoryFree(tfile); } -int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { +int idxTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { int ret = -1; if (tfile == NULL) { return ret; @@ -667,7 +667,7 @@ int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { return tfileReaderSearch(reader, query, result); } -int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) { +int idxTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) { // TFileWriterOpt wOpt = {.suid = term->suid, .colType = term->colType, .colName = term->colName, .nColName = // term->nColName, .version = 1}; @@ -845,7 +845,7 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) { TFileHeader* header = &write->header; uint8_t colType = header->colType; - colType = INDEX_TYPE_GET_TYPE(colType); + colType = IDX_TYPE_GET_TYPE(colType); FstSlice key = fstSliceCreate((uint8_t*)(tval->colVal), (size_t)strlen(tval->colVal)); if (fstBuilderInsert(write->fb, key, tval->offset)) { fstSliceDestroy(&key); diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 90dea3a377..e18297cd25 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -521,10 +521,10 @@ class CacheObj { public: CacheObj() { // TODO - cache = indexCacheCreate(NULL, 0, "voltage", TSDB_DATA_TYPE_BINARY); + cache = idxCacheCreate(NULL, 0, "voltage", TSDB_DATA_TYPE_BINARY); } int Put(SIndexTerm* term, int16_t colId, int32_t version, uint64_t uid) { - int ret = indexCachePut(cache, term, uid); + int ret = idxCachePut(cache, term, uid); if (ret != 0) { // std::cout << "failed to put into cache: " << ret << std::endl; @@ -533,12 +533,12 @@ class CacheObj { } void Debug() { // - indexCacheDebug(cache); + idxCacheDebug(cache); } int Get(SIndexTermQuery* query, int16_t colId, int32_t version, SArray* result, STermValueType* s) { SIdxTRslt* tr = idxTRsltCreate(); - int ret = indexCacheSearch(cache, query, tr, s); + int ret = idxCacheSearch(cache, query, tr, s); idxTRsltMergeTo(tr, result); idxTRsltDestroy(tr); @@ -549,7 +549,7 @@ class CacheObj { } ~CacheObj() { // TODO - indexCacheDestroy(cache); + idxCacheDestroy(cache); } private: diff --git a/source/libs/sync/inc/syncAppendEntries.h b/source/libs/sync/inc/syncAppendEntries.h index 5999ef8300..98df22d51b 100644 --- a/source/libs/sync/inc/syncAppendEntries.h +++ b/source/libs/sync/inc/syncAppendEntries.h @@ -93,6 +93,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncAppendEntriesReply.h b/source/libs/sync/inc/syncAppendEntriesReply.h index c0c1f76707..e509a50dc4 100644 --- a/source/libs/sync/inc/syncAppendEntriesReply.h +++ b/source/libs/sync/inc/syncAppendEntriesReply.h @@ -41,6 +41,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncElection.h b/source/libs/sync/inc/syncElection.h index 85a82dcfb7..128dbf4050 100644 --- a/source/libs/sync/inc/syncElection.h +++ b/source/libs/sync/inc/syncElection.h @@ -39,6 +39,8 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode); +int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode); + int32_t syncNodeElect(SSyncNode* pSyncNode); int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg); diff --git a/source/libs/sync/inc/syncIO.h b/source/libs/sync/inc/syncIO.h index b69c087b5f..88d3065375 100644 --- a/source/libs/sync/inc/syncIO.h +++ b/source/libs/sync/inc/syncIO.h @@ -57,6 +57,9 @@ typedef struct SSyncIO { int32_t (*FpOnSyncAppendEntriesReply)(SSyncNode *pSyncNode, SyncAppendEntriesReply *pMsg); int32_t (*FpOnSyncTimeout)(SSyncNode *pSyncNode, SyncTimeout *pMsg); + int32_t (*FpOnSyncSnapshotSend)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg); + int32_t (*FpOnSyncSnapshotRsp)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg); + int8_t isStart; } SSyncIO; diff --git a/source/libs/sync/inc/syncIndexMgr.h b/source/libs/sync/inc/syncIndexMgr.h index 0a6e2428fe..1f60a9d57e 100644 --- a/source/libs/sync/inc/syncIndexMgr.h +++ b/source/libs/sync/inc/syncIndexMgr.h @@ -30,6 +30,7 @@ extern "C" { typedef struct SSyncIndexMgr { SRaftId (*replicas)[TSDB_MAX_REPLICA]; SyncIndex index[TSDB_MAX_REPLICA]; + SyncTerm privateTerm[TSDB_MAX_REPLICA]; // for advanced function int32_t replicaNum; SSyncNode *pSyncNode; } SSyncIndexMgr; @@ -43,6 +44,9 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId cJSON * syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr); char * syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr); +// void syncIndexMgrSetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncTerm term); +// SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId); + // for debug ------------------- void syncIndexMgrPrint(SSyncIndexMgr *pObj); void syncIndexMgrPrint2(char *s, SSyncIndexMgr *pObj); diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 4100aa0216..10218f69e6 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -58,6 +58,8 @@ typedef struct SSyncRespMgr SSyncRespMgr; typedef struct SSyncSnapshotSender SSyncSnapshotSender; typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver; +extern bool gRaftDetailLog; + typedef struct SSyncNode { // init by SSyncInfo SyncGroupId vgId; @@ -137,24 +139,27 @@ typedef struct SSyncNode { uint64_t heartbeatTimerCounter; // callback - int32_t (*FpOnPing)(SSyncNode* ths, SyncPing* pMsg); - int32_t (*FpOnPingReply)(SSyncNode* ths, SyncPingReply* pMsg); - int32_t (*FpOnClientRequest)(SSyncNode* ths, SyncClientRequest* pMsg); - int32_t (*FpOnRequestVote)(SSyncNode* ths, SyncRequestVote* pMsg); - int32_t (*FpOnRequestVoteReply)(SSyncNode* ths, SyncRequestVoteReply* pMsg); - int32_t (*FpOnAppendEntries)(SSyncNode* ths, SyncAppendEntries* pMsg); - int32_t (*FpOnAppendEntriesReply)(SSyncNode* ths, SyncAppendEntriesReply* pMsg); - int32_t (*FpOnTimeout)(SSyncNode* pSyncNode, SyncTimeout* pMsg); + FpOnPingCb FpOnPing; + FpOnPingReplyCb FpOnPingReply; + FpOnClientRequestCb FpOnClientRequest; + FpOnTimeoutCb FpOnTimeout; + FpOnRequestVoteCb FpOnRequestVote; + FpOnRequestVoteReplyCb FpOnRequestVoteReply; + FpOnAppendEntriesCb FpOnAppendEntries; + FpOnAppendEntriesReplyCb FpOnAppendEntriesReply; + FpOnSnapshotSendCb FpOnSnapshotSend; + FpOnSnapshotRspCb FpOnSnapshotRsp; // tools SSyncRespMgr* pSyncRespMgr; // restore state - // sem_t restoreSem; - bool restoreFinish; - SSnapshot* pSnapshot; - SSyncSnapshotSender* pSender; - SSyncSnapshotReceiver* pReceiver; + bool restoreFinish; + // SSnapshot* pSnapshot; + SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; + SSyncSnapshotReceiver* pNewNodeReceiver; + + SSnapshotMeta sMeta; } SSyncNode; @@ -164,6 +169,9 @@ void syncNodeStart(SSyncNode* pSyncNode); void syncNodeStartStandBy(SSyncNode* pSyncNode); void syncNodeClose(SSyncNode* pSyncNode); +// option +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); + // ping -------------- int32_t syncNodePing(SSyncNode* pSyncNode, const SRaftId* destRaftId, SyncPing* pMsg); int32_t syncNodePingSelf(SSyncNode* pSyncNode); @@ -205,6 +213,25 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode); void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId); void syncNodeVoteForSelf(SSyncNode* pSyncNode); +// snapshot -------------- +bool syncNodeHasSnapshot(SSyncNode* pSyncNode); +bool syncNodeIsIndexInSnapshot(SSyncNode* pSyncNode, SyncIndex index); + +SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode); +SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode); +int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm); + +SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode); + +SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index); +SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index); +int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm); + +int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag); + +bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId); +SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId); + // for debug -------------- void syncNodePrint(SSyncNode* pObj); void syncNodePrint2(char* s, SSyncNode* pObj); diff --git a/source/libs/sync/inc/syncRaftCfg.h b/source/libs/sync/inc/syncRaftCfg.h index 1061e8bdc4..86c5fab87c 100644 --- a/source/libs/sync/inc/syncRaftCfg.h +++ b/source/libs/sync/inc/syncRaftCfg.h @@ -34,6 +34,7 @@ typedef struct SRaftCfg { TdFilePtr pFile; char path[TSDB_FILENAME_LEN * 2]; int8_t isStandBy; + int8_t snapshotEnable; } SRaftCfg; SRaftCfg *raftCfgOpen(const char *path); @@ -50,7 +51,12 @@ char * raftCfg2Str(SRaftCfg *pRaftCfg); int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg); int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg); -int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path); +typedef struct SRaftCfgMeta { + int8_t isStandBy; + int8_t snapshotEnable; +} SRaftCfgMeta; + +int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path); // for debug ---------------------- void syncCfgPrint(SSyncCfg *pCfg); diff --git a/source/libs/sync/inc/syncRaftLog.h b/source/libs/sync/inc/syncRaftLog.h index df5cd3f36c..aec1f77b42 100644 --- a/source/libs/sync/inc/syncRaftLog.h +++ b/source/libs/sync/inc/syncRaftLog.h @@ -30,6 +30,7 @@ extern "C" { typedef struct SSyncLogStoreData { SSyncNode* pSyncNode; SWal* pWal; + SyncIndex beginIndex; // valid begin index, default 0, may be set beginIndex > 0 } SSyncLogStoreData; SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode); @@ -39,14 +40,7 @@ char* logStore2Str(SSyncLogStore* pLogStore); cJSON* logStoreSimple2Json(SSyncLogStore* pLogStore); char* logStoreSimple2Str(SSyncLogStore* pLogStore); -// SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore); -// SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore); -// SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore); -// SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index); -// int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); -// int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex); -// int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); -// SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); +SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore); // for debug void logStorePrint(SSyncLogStore* pLogStore); diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index e0cbcf0744..9f03ac3e55 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -49,8 +49,8 @@ void raftStoreClearVote(SRaftStore *pRaftStore); void raftStoreNextTerm(SRaftStore *pRaftStore); void raftStoreSetTerm(SRaftStore *pRaftStore, SyncTerm term); int32_t raftStoreFromJson(SRaftStore *pRaftStore, cJSON *pJson); -cJSON * raftStore2Json(SRaftStore *pRaftStore); -char * raftStore2Str(SRaftStore *pRaftStore); +cJSON *raftStore2Json(SRaftStore *pRaftStore); +char *raftStore2Str(SRaftStore *pRaftStore); // for debug ------------------- void raftStorePrint(SRaftStore *pObj); diff --git a/source/libs/sync/inc/syncReplication.h b/source/libs/sync/inc/syncReplication.h index 6fe18dae38..4b1f5b4638 100644 --- a/source/libs/sync/inc/syncReplication.h +++ b/source/libs/sync/inc/syncReplication.h @@ -52,6 +52,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode); +int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode); int32_t syncNodeReplicate(SSyncNode* pSyncNode); int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg); diff --git a/source/libs/sync/inc/syncRequestVote.h b/source/libs/sync/inc/syncRequestVote.h index fd4ccd5371..3fe8dc0237 100644 --- a/source/libs/sync/inc/syncRequestVote.h +++ b/source/libs/sync/inc/syncRequestVote.h @@ -50,6 +50,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncRequestVoteReply.h b/source/libs/sync/inc/syncRequestVoteReply.h index bcaf71a541..ac47a8d026 100644 --- a/source/libs/sync/inc/syncRequestVoteReply.h +++ b/source/libs/sync/inc/syncRequestVoteReply.h @@ -45,6 +45,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 43d1c0c0c3..b16e47b51e 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -25,40 +25,64 @@ extern "C" { #include #include "cJSON.h" #include "syncInt.h" +#include "syncMessage.h" #include "taosdef.h" +#define SYNC_SNAPSHOT_SEQ_INVALID -1 +#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -2 +#define SYNC_SNAPSHOT_SEQ_BEGIN 0 +#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF + +#define SYNC_SNAPSHOT_RETRY_MS 5000 + typedef struct SSyncSnapshotSender { - int32_t sending; - int32_t received; - bool finish; - void * pCurrentBlock; + bool start; + int32_t seq; + int32_t ack; + void *pReader; + void *pCurrentBlock; int32_t blockLen; + SSnapshot snapshot; int64_t sendingMS; - SSnapshot *pSnapshot; SSyncNode *pSyncNode; + int32_t replicaIndex; + SyncTerm term; + SyncTerm privateTerm; + bool finish; } SSyncSnapshotSender; -SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode); +SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex); void snapshotSenderDestroy(SSyncSnapshotSender *pSender); +bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); +void snapshotSenderStart(SSyncSnapshotSender *pSender); +void snapshotSenderStop(SSyncSnapshotSender *pSender); int32_t snapshotSend(SSyncSnapshotSender *pSender); -cJSON * snapshotSender2Json(SSyncSnapshotSender *pSender); -char * snapshotSender2Str(SSyncSnapshotSender *pSender); +int32_t snapshotReSend(SSyncSnapshotSender *pSender); +cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender); +char *snapshotSender2Str(SSyncSnapshotSender *pSender); typedef struct SSyncSnapshotReceiver { - bool start; - int32_t received; - int32_t progressIndex; - void * pCurrentBlock; - int32_t len; - SSnapshot *pSnapshot; + bool start; + + int32_t ack; + void *pWriter; + SyncTerm term; + SyncTerm privateTerm; + SSyncNode *pSyncNode; + int32_t replicaIndex; } SSyncSnapshotReceiver; -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode); +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -int32_t snapshotReceive(SSyncSnapshotReceiver *pReceiver); -cJSON * snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); -char * snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); +bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply); +cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); +char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); + +int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg); +int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncUtil.h b/source/libs/sync/inc/syncUtil.h index 1b08d3f7a1..7ecff7ae97 100644 --- a/source/libs/sync/inc/syncUtil.h +++ b/source/libs/sync/inc/syncUtil.h @@ -61,6 +61,7 @@ bool syncUtilIsData(tmsg_t msgType); bool syncUtilUserPreCommit(tmsg_t msgType); bool syncUtilUserCommit(tmsg_t msgType); bool syncUtilUserRollback(tmsg_t msgType); +void syncUtilJson2Line(char* jsonStr); #ifdef __cplusplus } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 89dcd8a476..3c558b60c8 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -18,8 +18,10 @@ #include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" #include "syncVoteMgr.h" +#include "wal.h" // TLA+ Spec // HandleAppendEntriesRequest(i, j, m) == @@ -335,8 +337,12 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { cbMeta.currentTerm = ths->pRaftStore->currentTerm; cbMeta.flag = 0x11; + SSnapshot snapshot; + ASSERT(ths->pFsm->FpGetSnapshot != NULL); + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + bool needExecute = true; - if (ths->pSnapshot != NULL && cbMeta.index <= ths->pSnapshot->lastApplyIndex) { + if (cbMeta.index <= snapshot.lastApplyIndex) { needExecute = false; } @@ -427,3 +433,332 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { return ret; } + +static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { + int32_t code; + + SyncIndex delBegin = pMsg->prevLogIndex + 1; + SyncIndex delEnd = ths->pLogStore->syncLogLastIndex(ths->pLogStore); + + // invert roll back! + for (SyncIndex index = delEnd; index >= delBegin; --index) { + if (ths->pFsm->FpRollBackCb != NULL) { + SSyncRaftEntry* pRollBackEntry; + code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, index, &pRollBackEntry); + ASSERT(code == 0); + ASSERT(pRollBackEntry != NULL); + + if (syncUtilUserRollback(pRollBackEntry->msgType)) { + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg); + + SFsmCbMeta cbMeta; + cbMeta.index = pRollBackEntry->index; + cbMeta.isWeak = pRollBackEntry->isWeak; + cbMeta.code = 0; + cbMeta.state = ths->state; + cbMeta.seqNum = pRollBackEntry->seqNum; + ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta); + rpcFreeCont(rpcMsg.pCont); + } + + syncEntryDestory(pRollBackEntry); + } + } + + // delete confict entries + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); + ASSERT(code == 0); + sInfo("sync event log truncate, from %ld to %ld", delBegin, delEnd); + logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); + + return code; +} + +static int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry) { + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + if (ths->pFsm != NULL) { + if (ths->pFsm->FpPreCommitCb != NULL && syncUtilUserPreCommit(pEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pEntry->index; + cbMeta.isWeak = pEntry->isWeak; + cbMeta.code = 2; + cbMeta.state = ths->state; + cbMeta.seqNum = pEntry->seqNum; + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + } + rpcFreeCont(rpcMsg.pCont); + return 0; +} + +// really pre log match +// prevLogIndex == -1 +static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries* pMsg) { + if (pMsg->prevLogIndex == SYNC_INDEX_INVALID) { + if (gRaftDetailLog) { + sTrace("syncNodeOnAppendEntriesLogOK true, pMsg->prevLogIndex:%ld", pMsg->prevLogIndex); + } + return true; + } + + SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); + if (pMsg->prevLogIndex > myLastIndex) { + if (gRaftDetailLog) { + sTrace("syncNodeOnAppendEntriesLogOK false, pMsg->prevLogIndex:%ld, myLastIndex:%ld", pMsg->prevLogIndex, + myLastIndex); + } + return false; + } + + SyncTerm myPreLogTerm = syncNodeGetPreTerm(pSyncNode, pMsg->prevLogIndex + 1); + if (pMsg->prevLogIndex <= myLastIndex && pMsg->prevLogTerm == myPreLogTerm) { + if (gRaftDetailLog) { + sTrace( + "syncNodeOnAppendEntriesLogOK true, pMsg->prevLogIndex:%ld, myLastIndex:%ld, pMsg->prevLogTerm:%lu, " + "myPreLogTerm:%lu", + pMsg->prevLogIndex, myLastIndex, pMsg->prevLogTerm, myPreLogTerm); + } + return true; + } + + if (gRaftDetailLog) { + sTrace( + "syncNodeOnAppendEntriesLogOK false, pMsg->prevLogIndex:%ld, myLastIndex:%ld, pMsg->prevLogTerm:%lu, " + "myPreLogTerm:%lu", + pMsg->prevLogIndex, myLastIndex, pMsg->prevLogTerm, myPreLogTerm); + } + + return false; +} + +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) { + int32_t ret = 0; + int32_t code = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntries, vgId:%d, term:%lu", ths->vgId, + ths->pRaftStore->currentTerm); + syncAppendEntriesLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncAppendEntries maybe replica already dropped"); + return ret; + } + + // maybe update term + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncNodeUpdateTerm(ths, pMsg->term); + } + ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); + + // reset elect timer + if (pMsg->term == ths->pRaftStore->currentTerm) { + ths->leaderCache = pMsg->srcId; + syncNodeResetElectTimer(ths); + } + ASSERT(pMsg->dataLen >= 0); + + // candidate to follower + // + // operation: + // to follower + do { + bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE; + if (condition) { + sTrace("recv SyncAppendEntries, candidate to follower"); + + syncNodeBecomeFollower(ths); + // do not reply? + return ret; + } + } while (0); + + // fake match + // + // condition1: + // I have snapshot, no log, preIndex > myLastIndex + // + // condition2: + // I have snapshot, have log, log <= snapshot, preIndex > myLastIndex + // + // condition3: + // I have snapshot, preIndex < snapshot.lastApplyIndex + // + // condition4: + // I have snapshot, preIndex == snapshot.lastApplyIndex, no data + // + // operation: + // match snapshot.lastApplyIndex - 1; + // no operation on log + do { + SyncIndex myLastIndex = syncNodeGetLastIndex(ths); + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + + bool condition0 = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && + syncNodeHasSnapshot(ths); + bool condition1 = + condition0 && (ths->pLogStore->syncLogEntryCount(ths->pLogStore) == 0) && (pMsg->prevLogIndex > myLastIndex); + bool condition2 = condition0 && (ths->pLogStore->syncLogLastIndex(ths->pLogStore) <= snapshot.lastApplyIndex) && + (pMsg->prevLogIndex > myLastIndex); + bool condition3 = condition0 && (pMsg->prevLogIndex < snapshot.lastApplyIndex); + bool condition4 = condition0 && (pMsg->prevLogIndex == snapshot.lastApplyIndex) && (pMsg->dataLen == 0); + bool condition = condition1 || condition2 || condition3 || condition4; + + if (condition) { + sTrace( + "recv SyncAppendEntries, fake match, myLastIndex:%ld, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, " + "condition1:%d, condition2:%d, condition3:%d, condition4:%d", + myLastIndex, ths->pLogStore->syncLogBeginIndex(ths->pLogStore), + ths->pLogStore->syncLogEndIndex(ths->pLogStore), condition1, condition2, condition3, condition4); + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = true; + pReply->matchIndex = snapshot.lastApplyIndex; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return ret; + } + } while (0); + + // calculate logOK here, before will coredump, due to fake match + bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg); + + // not match + // + // condition1: + // term < myTerm + // + // condition2: + // !logOK + // + // operation: + // not match + // no operation on log + do { + bool condition1 = pMsg->term < ths->pRaftStore->currentTerm; + bool condition2 = + (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK; + bool condition = condition1 || condition2; + + if (condition) { + sTrace( + "recv SyncAppendEntries, not match, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, condition1:%d, " + "condition2:%d, logOK:%d", + ths->pLogStore->syncLogBeginIndex(ths->pLogStore), ths->pLogStore->syncLogEndIndex(ths->pLogStore), + condition1, condition2, logOK); + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = false; + pReply->matchIndex = SYNC_INDEX_INVALID; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return ret; + } + } while (0); + + // really match + // + // condition: + // logOK + // + // operation: + // match + // make log same + do { + bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK; + if (condition) { + // has extra entries (> preIndex) in local log + SyncIndex myLastIndex = syncNodeGetLastIndex(ths); + bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex; + + // has entries in SyncAppendEntries msg + bool hasAppendEntries = pMsg->dataLen > 0; + + sTrace("recv SyncAppendEntries, match, myLastIndex:%ld, hasExtraEntries:%d, hasAppendEntries:%d", myLastIndex, + hasExtraEntries, hasAppendEntries); + + if (hasExtraEntries) { + // make log same, rollback deleted entries + code = syncNodeMakeLogSame(ths, pMsg); + ASSERT(code == 0); + } + + if (hasAppendEntries) { + // append entry + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + ASSERT(pAppendEntry != NULL); + + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + ASSERT(code == 0); + + // pre commit + code = syncNodePreCommit(ths, pAppendEntry); + ASSERT(code == 0); + + syncEntryDestory(pAppendEntry); + } + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = true; + pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + 1 : pMsg->prevLogIndex; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + // maybe update commit index, leader notice me + if (pMsg->commitIndex > ths->commitIndex) { + // has commit entry in local + if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + SyncIndex beginIndex = ths->commitIndex + 1; + SyncIndex endIndex = pMsg->commitIndex; + + // update commit index + ths->commitIndex = pMsg->commitIndex; + + // call back Wal + code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); + ASSERT(code == 0); + + code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); + ASSERT(code == 0); + } + } + return ret; + } + } while (0); + + return ret; +} diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 4e6d870e19..5a543e1605 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -17,8 +17,10 @@ #include "syncCommit.h" #include "syncIndexMgr.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -94,3 +96,116 @@ int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* p return ret; } + +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, vgId:%d, term:%lu", ths->vgId, + ths->pRaftStore->currentTerm); + syncAppendEntriesReplyLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncAppendEntriesReply, maybe replica already dropped"); + return ret; + } + + // drop stale response + if (pMsg->term < ths->pRaftStore->currentTerm) { + sTrace("recv SyncAppendEntriesReply, drop stale response, receive_term:%lu current_term:%lu", pMsg->term, + ths->pRaftStore->currentTerm); + return ret; + } + + syncIndexMgrLog2("recv SyncAppendEntriesReply, before pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv SyncAppendEntriesReply, before pMatchIndex:", ths->pMatchIndex); + { + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + sTrace("recv SyncAppendEntriesReply, before snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + // no need this code, because if I receive reply.term, then I must have sent for that term. + // if (pMsg->term > ths->pRaftStore->currentTerm) { + // syncNodeUpdateTerm(ths, pMsg->term); + // } + + if (pMsg->term > ths->pRaftStore->currentTerm) { + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, error term, receive_term:%lu current_term:%lu", + pMsg->term, ths->pRaftStore->currentTerm); + syncNodeLog2(logBuf, ths); + sError("%s", logBuf); + return ret; + } + + ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + + if (pMsg->success) { + // nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); + sTrace("update next match, index:%ld, success:%d", pMsg->matchIndex + 1, pMsg->success); + + // matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] + syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); + + // maybe commit + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncMaybeAdvanceCommitIndex(ths); + } + + } else { + SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); + sTrace("update next not match, begin, index:%ld, success:%d", nextIndex, pMsg->success); + + // notice! int64, uint64 + if (nextIndex > SYNC_INDEX_BEGIN) { + --nextIndex; + + // get sender + SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId)); + ASSERT(pSender != NULL); + bool hasSnapshot = syncNodeHasSnapshot(ths); + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + + // start sending snapshot first time + // start here, stop by receiver + if (hasSnapshot && nextIndex <= snapshot.lastApplyIndex + 1 && !snapshotSenderIsStart(pSender) && + pMsg->privateTerm < pSender->privateTerm) { + snapshotSenderStart(pSender); + + char* s = snapshotSender2Str(pSender); + sInfo("sync event snapshot send start sender first time, sender:%s", s); + taosMemoryFree(s); + } + + SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1; + + // update nextIndex to sentryIndex + if (nextIndex <= sentryIndex) { + nextIndex = sentryIndex; + } + + } else { + nextIndex = SYNC_INDEX_BEGIN; + } + + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); + sTrace("update next not match, end, index:%ld, success:%d", nextIndex, pMsg->success); + } + + syncIndexMgrLog2("recv SyncAppendEntriesReply, after pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv SyncAppendEntriesReply, after pMatchIndex:", ths->pMatchIndex); + { + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + sTrace("recv SyncAppendEntriesReply, after snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index c6376495a4..c092b31adf 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -94,109 +94,8 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { // execute fsm if (pSyncNode->pFsm != NULL) { - for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - if (i != SYNC_INDEX_INVALID) { - SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, i); - assert(pEntry != NULL); - - SRpcMsg rpcMsg; - syncEntry2OriginalRpc(pEntry, &rpcMsg); - - if (pSyncNode->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { - SFsmCbMeta cbMeta; - cbMeta.index = pEntry->index; - cbMeta.isWeak = pEntry->isWeak; - cbMeta.code = 0; - cbMeta.state = pSyncNode->state; - cbMeta.seqNum = pEntry->seqNum; - cbMeta.term = pEntry->term; - cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; - cbMeta.flag = 0x1; - - bool needExecute = true; - if (pSyncNode->pSnapshot != NULL && cbMeta.index <= pSyncNode->pSnapshot->lastApplyIndex) { - needExecute = false; - } - - if (needExecute) { - pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &rpcMsg, cbMeta); - } - } - - // config change - if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { - SSyncCfg oldSyncCfg = pSyncNode->pRaftCfg->cfg; - - SSyncCfg newSyncCfg; - int32_t ret = syncCfgFromStr(rpcMsg.pCont, &newSyncCfg); - ASSERT(ret == 0); - - // update new config myIndex - bool hit = false; - for (int i = 0; i < newSyncCfg.replicaNum; ++i) { - if (strcmp(pSyncNode->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && - pSyncNode->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { - newSyncCfg.myIndex = i; - hit = true; - break; - } - } - - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - ASSERT(hit == true); - } - - bool isDrop; - syncNodeUpdateConfig(pSyncNode, &newSyncCfg, &isDrop); - - // change isStandBy to normal - if (!isDrop) { - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(pSyncNode); - } else { - syncNodeBecomeFollower(pSyncNode); - } - } - - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x1 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); - taosMemoryFree(sOld); - taosMemoryFree(sNew); - - if (pSyncNode->pFsm->FpReConfigCb != NULL) { - SReConfigCbMeta cbMeta = {0}; - cbMeta.code = 0; - cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; - cbMeta.index = pEntry->index; - cbMeta.term = pEntry->term; - cbMeta.oldCfg = oldSyncCfg; - cbMeta.flag = 0x1; - cbMeta.isDrop = isDrop; - pSyncNode->pFsm->FpReConfigCb(pSyncNode->pFsm, newSyncCfg, cbMeta); - } - } - - // restore finish - if (pEntry->index == pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore)) { - if (pSyncNode->restoreFinish == false) { - if (pSyncNode->pFsm->FpRestoreFinishCb != NULL) { - pSyncNode->pFsm->FpRestoreFinishCb(pSyncNode->pFsm); - } - pSyncNode->restoreFinish = true; - sInfo("==syncMaybeAdvanceCommitIndex== restoreFinish set true %p vgId:%d", pSyncNode, pSyncNode->vgId); - - /* - tsem_post(&pSyncNode->restoreSem); - sInfo("==syncMaybeAdvanceCommitIndex== RestoreFinish tsem_post %p", pSyncNode); - */ - } - } - - rpcFreeCont(rpcMsg.pCont); - syncEntryDestory(pEntry); - } - } + int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); + ASSERT(code == 0); } } } diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index 5101344b84..fdebbe3990 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -15,6 +15,7 @@ #include "syncElection.h" #include "syncMessage.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncVoteMgr.h" @@ -49,6 +50,26 @@ int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) { return ret; } +int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode) { + ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); + + int32_t ret = 0; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId); + pMsg->srcId = pSyncNode->myRaftId; + pMsg->destId = pSyncNode->peersId[i]; + pMsg->term = pSyncNode->pRaftStore->currentTerm; + + ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm)); + ASSERT(ret == 0); + + ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg); + ASSERT(ret == 0); + syncRequestVoteDestroy(pMsg); + } + return ret; +} + int32_t syncNodeElect(SSyncNode* pSyncNode) { int32_t ret = 0; if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { @@ -71,7 +92,12 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) { return ret; } - ret = syncNodeRequestVotePeers(pSyncNode); + if (pSyncNode->pRaftCfg->snapshotEnable) { + ret = syncNodeRequestVotePeersSnapshot(pSyncNode); + } else { + ret = syncNodeRequestVotePeers(pSyncNode); + } + assert(ret == 0); syncNodeResetElectTimer(pSyncNode); diff --git a/source/libs/sync/src/syncIO.c b/source/libs/sync/src/syncIO.c index aa8484de99..0b5a9685c0 100644 --- a/source/libs/sync/src/syncIO.c +++ b/source/libs/sync/src/syncIO.c @@ -75,7 +75,8 @@ int32_t syncIOSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { syncUtilMsgNtoH(pMsg->pCont); char logBuf[256] = {0}; - snprintf(logBuf, sizeof(logBuf), "==syncIOSendMsg== %s:%d", pEpSet->eps[0].fqdn, pEpSet->eps[0].port); + snprintf(logBuf, sizeof(logBuf), "==syncIOSendMsg== %s:%d msgType:%d", pEpSet->eps[0].fqdn, pEpSet->eps[0].port, + pMsg->msgType); syncRpcMsgLog2(logBuf, pMsg); syncUtilMsgHtoN(pMsg->pCont); @@ -89,8 +90,10 @@ int32_t syncIOSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { int32_t syncIOEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { int32_t ret = 0; - char logBuf[128] = {0}; - syncRpcMsgLog2((char *)"==syncIOEqMsg==", pMsg); + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==syncIOEqMsg== msgType:%d", pMsg->msgType); + syncRpcMsgLog2(logBuf, pMsg); SRpcMsg *pTemp; pTemp = taosAllocateQitem(sizeof(SRpcMsg), DEF_QITEM); @@ -253,7 +256,9 @@ static void *syncIOConsumerFunc(void *param) { for (int i = 0; i < numOfMsgs; ++i) { taosGetQitem(qall, (void **)&pRpcMsg); - syncRpcMsgLog2((char *)"==syncIOConsumerFunc==", pRpcMsg); + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "==syncIOConsumMsg== msgType:%d", pRpcMsg->msgType); + syncRpcMsgLog2(logBuf, pRpcMsg); // use switch case instead of if else if (pRpcMsg->msgType == TDMT_SYNC_PING) { @@ -319,6 +324,23 @@ static void *syncIOConsumerFunc(void *param) { io->FpOnSyncTimeout(io->pSyncNode, pSyncMsg); syncTimeoutDestroy(pSyncMsg); } + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + if (io->FpOnSyncSnapshotSend != NULL) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + io->FpOnSyncSnapshotSend(io->pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + } + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + if (io->FpOnSyncSnapshotRsp != NULL) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + io->FpOnSyncSnapshotRsp(io->pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + } + } else { sTrace("unknown msgType:%d, no operator", pRpcMsg->msgType); } diff --git a/source/libs/sync/src/syncIndexMgr.c b/source/libs/sync/src/syncIndexMgr.c index 4d556d21dd..ecc1c8f1e2 100644 --- a/source/libs/sync/src/syncIndexMgr.c +++ b/source/libs/sync/src/syncIndexMgr.c @@ -46,6 +46,7 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr) { void syncIndexMgrClear(SSyncIndexMgr *pSyncIndexMgr) { memset(pSyncIndexMgr->index, 0, sizeof(pSyncIndexMgr->index)); + memset(pSyncIndexMgr->privateTerm, 0, sizeof(pSyncIndexMgr->privateTerm)); /* for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { pSyncIndexMgr->index[i] = 0; @@ -62,7 +63,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, } // maybe config change - // assert(0); + assert(0); } SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) { @@ -86,14 +87,27 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) { for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { cJSON_AddItemToArray(pReplicas, syncUtilRaftId2Json(&(*(pSyncIndexMgr->replicas))[i])); } - int respondNum = 0; - int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); - for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { - arr[i] = pSyncIndexMgr->index[i]; + + { + int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + arr[i] = pSyncIndexMgr->index[i]; + } + cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); + taosMemoryFree(arr); + cJSON_AddItemToObject(pRoot, "index", pIndex); } - cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); - taosMemoryFree(arr); - cJSON_AddItemToObject(pRoot, "index", pIndex); + + { + int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + arr[i] = pSyncIndexMgr->privateTerm[i]; + } + cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); + taosMemoryFree(arr); + cJSON_AddItemToObject(pRoot, "privateTerm", pIndex); + } + snprintf(u64buf, sizeof(u64buf), "%p", pSyncIndexMgr->pSyncNode); cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); } @@ -105,7 +119,7 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) { char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr) { cJSON *pJson = syncIndexMgr2Json(pSyncIndexMgr); - char * serialized = cJSON_Print(pJson); + char *serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -132,7 +146,31 @@ void syncIndexMgrLog(SSyncIndexMgr *pObj) { } void syncIndexMgrLog2(char *s, SSyncIndexMgr *pObj) { - char *serialized = syncIndexMgr2Str(pObj); - sTrace("syncIndexMgrLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char *serialized = syncIndexMgr2Str(pObj); + sTrace("syncIndexMgrLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +void syncIndexMgrSetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncTerm term) { + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) { + (pSyncIndexMgr->privateTerm)[i] = term; + return; + } + } + + // maybe config change + assert(0); +} + +SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) { + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) { + SyncTerm term = (pSyncIndexMgr->privateTerm)[i]; + return term; + } + } + assert(0); } \ No newline at end of file diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 795d3e3c27..9516df64da 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -29,11 +29,14 @@ #include "syncRequestVote.h" #include "syncRequestVoteReply.h" #include "syncRespMgr.h" +#include "syncSnapshot.h" #include "syncTimeout.h" #include "syncUtil.h" #include "syncVoteMgr.h" #include "tref.h" +bool gRaftDetailLog = false; + static int32_t tsNodeRefId = -1; // ------ local funciton --------- @@ -213,6 +216,18 @@ bool syncIsRestoreFinish(int64_t rid) { return b; } +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return -1; + } + assert(rid == pSyncNode->rid); + *sMeta = pSyncNode->sMeta; + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; +} + const char* syncGetMyRoleStr(int64_t rid) { const char* s = syncUtilState2String(syncGetMyRole(rid)); return s; @@ -411,8 +426,11 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { snprintf(pSyncNode->configPath, sizeof(pSyncNode->configPath), "%s/raft_config.json", pSyncInfo->path); if (!taosCheckExistFile(pSyncNode->configPath)) { - // create raft config file - ret = raftCfgCreateFile((SSyncCfg*)&(pSyncInfo->syncCfg), pSyncInfo->isStandBy, pSyncNode->configPath); + // create a new raft config file + SRaftCfgMeta meta; + meta.isStandBy = pSyncInfo->isStandBy; + meta.snapshotEnable = pSyncInfo->snapshotEnable; + ret = raftCfgCreateFile((SSyncCfg*)&(pSyncInfo->syncCfg), meta, pSyncNode->configPath); assert(ret == 0); } else { @@ -552,35 +570,64 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { pSyncNode->FpOnPing = syncNodeOnPingCb; pSyncNode->FpOnPingReply = syncNodeOnPingReplyCb; pSyncNode->FpOnClientRequest = syncNodeOnClientRequestCb; - pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb; - pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb; - pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb; - pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb; pSyncNode->FpOnTimeout = syncNodeOnTimeoutCb; + pSyncNode->FpOnSnapshotSend = syncNodeOnSnapshotSendCb; + pSyncNode->FpOnSnapshotRsp = syncNodeOnSnapshotRspCb; + + if (pSyncNode->pRaftCfg->snapshotEnable) { + sInfo("sync node use snapshot"); + pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteSnapshotCb; + pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplySnapshotCb; + pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesSnapshotCb; + pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplySnapshotCb; + + } else { + sInfo("sync node do not use snapshot"); + pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb; + pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb; + pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb; + pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb; + } + // tools pSyncNode->pSyncRespMgr = syncRespMgrCreate(NULL, 0); assert(pSyncNode->pSyncRespMgr != NULL); // restore state pSyncNode->restoreFinish = false; - pSyncNode->pSnapshot = NULL; - if (pSyncNode->pFsm->FpGetSnapshot != NULL) { - pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); - pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); - } + + // pSyncNode->pSnapshot = NULL; + // if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + // pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); + // pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); + // } // tsem_init(&(pSyncNode->restoreSem), 0, 0); + // snapshot senders + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i); + // ASSERT(pSender != NULL); + (pSyncNode->senders)[i] = pSender; + } + + // snapshot receivers + pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, 100); + // start in syncNodeStart // start raft // syncNodeBecomeFollower(pSyncNode); + // snapshot meta + pSyncNode->sMeta.lastConfigIndex = -1; + return pSyncNode; } void syncNodeStart(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { + raftStoreNextTerm(pSyncNode->pRaftStore); syncNodeBecomeLeader(pSyncNode); syncNodeLog2("==state change become leader immediately==", pSyncNode); @@ -662,9 +709,23 @@ void syncNodeClose(SSyncNode* pSyncNode) { taosMemoryFree(pSyncNode->pFsm); } + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + if ((pSyncNode->senders)[i] != NULL) { + snapshotSenderDestroy((pSyncNode->senders)[i]); + (pSyncNode->senders)[i] = NULL; + } + } + + if (pSyncNode->pNewNodeReceiver != NULL) { + snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); + pSyncNode->pNewNodeReceiver = NULL; + } + + /* if (pSyncNode->pSnapshot != NULL) { taosMemoryFree(pSyncNode->pSnapshot); } + */ // tsem_destroy(&pSyncNode->restoreSem); @@ -672,6 +733,9 @@ void syncNodeClose(SSyncNode* pSyncNode) { // taosMemoryFree(pSyncNode); } +// option +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode) { return pSyncNode->pRaftCfg->snapshotEnable; } + // ping -------------- int32_t syncNodePing(SSyncNode* pSyncNode, const SRaftId* destRaftId, SyncPing* pMsg) { syncPingLog2((char*)"==syncNodePing==", pMsg); @@ -762,7 +826,13 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) { int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { int32_t ret = 0; - int32_t electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); + int32_t electMS; + + if (pSyncNode->pRaftCfg->isStandBy) { + electMS = TIMER_MAX_MS; + } else { + electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); + } ret = syncNodeRestartElectTimer(pSyncNode, electMS); return ret; } @@ -788,6 +858,13 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRp SEpSet epSet; syncUtilraftId2EpSet(destRaftId, &epSet); if (pSyncNode->FpSendMsg != NULL) { + if (gRaftDetailLog) { + char* JsonStr = syncRpcMsg2Str(pMsg); + syncUtilJson2Line(JsonStr); + sTrace("sync send msg, vgId:%d, type:%d, msg:%s", pSyncNode->vgId, pMsg->msgType, JsonStr); + taosMemoryFree(JsonStr); + } + // htonl syncUtilMsgHtoN(pMsg->pCont); @@ -952,6 +1029,20 @@ cJSON* syncNode2Json(const SSyncNode* pSyncNode) { cJSON_AddStringToObject(pRoot, "FpOnAppendEntriesReply", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pSyncNode->FpOnTimeout); cJSON_AddStringToObject(pRoot, "FpOnTimeout", u64buf); + + // restoreFinish + cJSON_AddNumberToObject(pRoot, "restoreFinish", pSyncNode->restoreFinish); + + // snapshot senders + cJSON* pSenders = cJSON_CreateArray(); + cJSON_AddItemToObject(pRoot, "senders", pSenders); + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + cJSON_AddItemToArray(pSenders, snapshotSender2Json((pSyncNode->senders)[i])); + } + + // snapshot receivers + cJSON* pReceivers = cJSON_CreateArray(); + cJSON_AddItemToObject(pRoot, "receiver", snapshotReceiver2Json(pSyncNode->pNewNodeReceiver)); } cJSON* pJson = cJSON_CreateObject(); @@ -973,10 +1064,10 @@ char* syncNode2SimpleStr(const SSyncNode* pSyncNode) { "syncNode2SimpleStr vgId:%d currentTerm:%lu, commitIndex:%ld, state:%d %s, isStandBy:%d, " "electTimerLogicClock:%lu, " "electTimerLogicClockUser:%lu, " - "electTimerMS:%d", + "electTimerMS:%d, replicaNum:%d", pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, pSyncNode->state, syncUtilState2String(pSyncNode->state), pSyncNode->pRaftCfg->isStandBy, pSyncNode->electTimerLogicClock, - pSyncNode->electTimerLogicClockUser, pSyncNode->electTimerMS); + pSyncNode->electTimerLogicClockUser, pSyncNode->electTimerMS, pSyncNode->replicaNum); return s; } @@ -1013,6 +1104,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, bool* isDro voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode); votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode); + pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); + // isDrop *isDrop = true; bool IamInOld, IamInNew; @@ -1103,7 +1196,15 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode) { for (int i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) { // maybe overwrite myself, no harm // just do it! - pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; + + // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; + + // maybe wal is deleted + SyncIndex lastIndex; + SyncTerm lastTerm; + int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm); + ASSERT(code == 0); + pSyncNode->pNextIndex->index[i] = lastIndex + 1; } for (int i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { @@ -1112,6 +1213,17 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode) { pSyncNode->pMatchIndex->index[i] = SYNC_INDEX_INVALID; } + // update sender private term + SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); + if (pMySender != NULL) { + for (int i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { + if ((pSyncNode->senders)[i]->privateTerm > pMySender->privateTerm) { + pMySender->privateTerm = (pSyncNode->senders)[i]->privateTerm; + } + } + (pMySender->privateTerm) += 100; + } + // stop elect timer syncNodeStopElectTimer(pSyncNode); @@ -1186,6 +1298,153 @@ void syncNodeVoteForSelf(SSyncNode* pSyncNode) { syncRequestVoteReplyDestroy(pMsg); } +// snapshot -------------- +bool syncNodeHasSnapshot(SSyncNode* pSyncNode) { + bool ret = false; + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN) { + ret = true; + } + } + return ret; +} + +bool syncNodeIsIndexInSnapshot(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(syncNodeHasSnapshot(pSyncNode)); + ASSERT(pSyncNode->pFsm->FpGetSnapshot != NULL); + ASSERT(index >= SYNC_INDEX_BEGIN); + + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + bool b = (index <= snapshot.lastApplyIndex); + return b; +} + +SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode) { + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); + + SyncIndex lastIndex = logLastIndex > snapshot.lastApplyIndex ? logLastIndex : snapshot.lastApplyIndex; + return lastIndex; +} + +SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode) { + SyncTerm lastTerm = 0; + if (syncNodeHasSnapshot(pSyncNode)) { + // has snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + + SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); + if (logLastIndex > snapshot.lastApplyIndex) { + lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore); + } else { + lastTerm = snapshot.lastApplyTerm; + } + + } else { + // no snapshot + lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore); + } + + return lastTerm; +} + +// get last index and term along with snapshot +int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm) { + *pLastIndex = syncNodeGetLastIndex(pSyncNode); + *pLastTerm = syncNodeGetLastTerm(pSyncNode); + return 0; +} + +SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode) { + SyncIndex syncStartIndex = syncNodeGetLastIndex(pSyncNode) + 1; + return syncStartIndex; +} + +SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(index >= SYNC_INDEX_BEGIN); + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + ASSERT(index <= syncStartIndex); + + SyncIndex preIndex = index - 1; + return preIndex; +} + +SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(index >= SYNC_INDEX_BEGIN); + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + ASSERT(index <= syncStartIndex); + + if (index == SYNC_INDEX_BEGIN) { + return 0; + } + + SyncTerm preTerm = 0; + if (syncNodeHasSnapshot(pSyncNode)) { + // has snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + + if (index > snapshot.lastApplyIndex + 1) { + // should be log preTerm + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + + } else if (index == snapshot.lastApplyIndex + 1) { + preTerm = snapshot.lastApplyTerm; + + } else { + // maybe snapshot change + sError("sync get pre term, bad scene. index:%ld", index); + logStoreLog2("sync get pre term, bad scene", pSyncNode->pLogStore); + + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + } + + } else { + // no snapshot + ASSERT(index > SYNC_INDEX_BEGIN); + + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + } + + return preTerm; +} + +// get pre index and term of "index" +int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm) { + *pPreIndex = syncNodeGetPreIndex(pSyncNode, index); + *pPreTerm = syncNodeGetPreTerm(pSyncNode, index); + return 0; +} + // for debug -------------- void syncNodePrint(SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); @@ -1327,7 +1586,8 @@ static int32_t syncNodeAppendNoop(SSyncNode* ths) { assert(pEntry != NULL); if (ths->state == TAOS_SYNC_STATE_LEADER) { - ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + // ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); syncNodeReplicate(ths); } @@ -1383,13 +1643,14 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg) { int32_t ret = 0; syncClientRequestLog2("==syncNodeOnClientRequestCb==", pMsg); - SyncIndex index = ths->pLogStore->getLastIndex(ths->pLogStore) + 1; + SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); SyncTerm term = ths->pRaftStore->currentTerm; SSyncRaftEntry* pEntry = syncEntryBuild2((SyncClientRequest*)pMsg, term, index); assert(pEntry != NULL); if (ths->state == TAOS_SYNC_STATE_LEADER) { - ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + // ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); // start replicate right now! syncNodeReplicate(ths); @@ -1459,3 +1720,137 @@ const char* syncStr(ESyncState state) { return "error"; } } + +int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { + int32_t code = 0; + ESyncState state = flag; + sInfo("sync event commit from index:%" PRId64 " to index:%" PRId64 ", %s", beginIndex, endIndex, + syncUtilState2String(state)); + + // maybe execute by leader, skip snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (ths->pFsm->FpGetSnapshot != NULL) { + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + } + if (beginIndex <= snapshot.lastApplyIndex) { + beginIndex = snapshot.lastApplyIndex + 1; + } + + // execute fsm + if (ths->pFsm != NULL) { + for (SyncIndex i = beginIndex; i <= endIndex; ++i) { + if (i != SYNC_INDEX_INVALID) { + SSyncRaftEntry* pEntry; + code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); + ASSERT(code == 0); + ASSERT(pEntry != NULL); + + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + + if (ths->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pEntry->index; + cbMeta.isWeak = pEntry->isWeak; + cbMeta.code = 0; + cbMeta.state = ths->state; + cbMeta.seqNum = pEntry->seqNum; + cbMeta.term = pEntry->term; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.flag = flag; + + ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + + // config change + if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { + SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; + + SSyncCfg newSyncCfg; + int32_t ret = syncCfgFromStr(rpcMsg.pCont, &newSyncCfg); + ASSERT(ret == 0); + + // update new config myIndex + bool hit = false; + for (int i = 0; i < newSyncCfg.replicaNum; ++i) { + if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && + ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { + newSyncCfg.myIndex = i; + hit = true; + break; + } + } + + SReConfigCbMeta cbMeta = {0}; + bool isDrop; + + // I am in newConfig + if (hit) { + syncNodeUpdateConfig(ths, &newSyncCfg, &isDrop); + + // change isStandBy to normal + if (!isDrop) { + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncNodeBecomeLeader(ths); + } else { + syncNodeBecomeFollower(ths); + } + } + + char* sOld = syncCfg2Str(&oldSyncCfg); + char* sNew = syncCfg2Str(&newSyncCfg); + sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); + taosMemoryFree(sOld); + taosMemoryFree(sNew); + } + + // always call FpReConfigCb + if (ths->pFsm->FpReConfigCb != NULL) { + cbMeta.code = 0; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.index = pEntry->index; + cbMeta.term = pEntry->term; + cbMeta.oldCfg = oldSyncCfg; + cbMeta.flag = 0x11; + cbMeta.isDrop = isDrop; + ths->pFsm->FpReConfigCb(ths->pFsm, newSyncCfg, cbMeta); + } + } + + // restore finish + if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + if (ths->restoreFinish == false) { + if (ths->pFsm->FpRestoreFinishCb != NULL) { + ths->pFsm->FpRestoreFinishCb(ths->pFsm); + } + ths->restoreFinish = true; + sInfo("restore finish %p vgId:%d", ths, ths->vgId); + } + } + + rpcFreeCont(rpcMsg.pCont); + syncEntryDestory(pEntry); + } + } + } + return 0; +} + +bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) { + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) { + return true; + } + } + return false; +} + +SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) { + SSyncSnapshotSender* pSender = NULL; + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) { + pSender = (ths->senders)[i]; + } + } + return pSender; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 6871e6b3ed..af04a0f649 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -65,6 +65,16 @@ cJSON* syncRpcMsg2Json(SRpcMsg* pRpcMsg) { pRoot = syncAppendEntriesReply2Json(pSyncMsg); syncAppendEntriesReplyDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend* pSyncMsg = syncSnapshotSendDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + pRoot = syncSnapshotSend2Json(pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp* pSyncMsg = syncSnapshotRspDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + pRoot = syncSnapshotRsp2Json(pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_COMMON_RESPONSE) { pRoot = cJSON_CreateObject(); char* s; @@ -135,9 +145,11 @@ void syncRpcMsgLog(SRpcMsg* pMsg) { } void syncRpcMsgLog2(char* s, SRpcMsg* pMsg) { - char* serialized = syncRpcMsg2Str(pMsg); - sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRpcMsg2Str(pMsg); + sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncTimeout---- @@ -264,9 +276,11 @@ void syncTimeoutLog(const SyncTimeout* pMsg) { } void syncTimeoutLog2(char* s, const SyncTimeout* pMsg) { - char* serialized = syncTimeout2Str(pMsg); - sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncTimeout2Str(pMsg); + sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncPing---- @@ -524,9 +538,11 @@ void syncPingLog(const SyncPing* pMsg) { } void syncPingLog2(char* s, const SyncPing* pMsg) { - char* serialized = syncPing2Str(pMsg); - sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncPing2Str(pMsg); + sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncPingReply---- @@ -784,9 +800,11 @@ void syncPingReplyLog(const SyncPingReply* pMsg) { } void syncPingReplyLog2(char* s, const SyncPingReply* pMsg) { - char* serialized = syncPingReply2Str(pMsg); - sTrace("syncPingReplyLog2 | len:%zu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncPingReply2Str(pMsg); + sTrace("syncPingReplyLog2 | len:%zu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncClientRequest---- @@ -925,9 +943,11 @@ void syncClientRequestLog(const SyncClientRequest* pMsg) { } void syncClientRequestLog2(char* s, const SyncClientRequest* pMsg) { - char* serialized = syncClientRequest2Str(pMsg); - sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncClientRequest2Str(pMsg); + sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncRequestVote---- @@ -1074,9 +1094,11 @@ void syncRequestVoteLog(const SyncRequestVote* pMsg) { } void syncRequestVoteLog2(char* s, const SyncRequestVote* pMsg) { - char* serialized = syncRequestVote2Str(pMsg); - sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRequestVote2Str(pMsg); + sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncRequestVoteReply---- @@ -1220,9 +1242,11 @@ void syncRequestVoteReplyLog(const SyncRequestVoteReply* pMsg) { } void syncRequestVoteReplyLog2(char* s, const SyncRequestVoteReply* pMsg) { - char* serialized = syncRequestVoteReply2Str(pMsg); - sTrace("syncRequestVoteReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRequestVoteReply2Str(pMsg); + sTrace("syncRequestVoteReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncAppendEntries---- @@ -1333,6 +1357,9 @@ cJSON* syncAppendEntries2Json(const SyncAppendEntries* pMsg) { snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->prevLogIndex); cJSON_AddStringToObject(pRoot, "prevLogIndex", u64buf); @@ -1386,9 +1413,11 @@ void syncAppendEntriesLog(const SyncAppendEntries* pMsg) { } void syncAppendEntriesLog2(char* s, const SyncAppendEntries* pMsg) { - char* serialized = syncAppendEntries2Str(pMsg); - sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncAppendEntries2Str(pMsg); + sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncAppendEntriesReply---- @@ -1494,6 +1523,9 @@ cJSON* syncAppendEntriesReply2Json(const SyncAppendEntriesReply* pMsg) { cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); cJSON_AddItemToObject(pRoot, "destId", pDestId); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); cJSON_AddNumberToObject(pRoot, "success", pMsg->success); @@ -1535,9 +1567,11 @@ void syncAppendEntriesReplyLog(const SyncAppendEntriesReply* pMsg) { } void syncAppendEntriesReplyLog2(char* s, const SyncAppendEntriesReply* pMsg) { - char* serialized = syncAppendEntriesReply2Str(pMsg); - sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncAppendEntriesReply2Str(pMsg); + sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncApplyMsg---- @@ -1686,7 +1720,339 @@ void syncApplyMsgLog(const SyncApplyMsg* pMsg) { } void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg) { - char* serialized = syncApplyMsg2Str(pMsg); - sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + if (gRaftDetailLog) { + char* serialized = syncApplyMsg2Str(pMsg); + sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +// --------------------------------------------- +SyncSnapshotSend* syncSnapshotSendBuild(uint32_t dataLen, int32_t vgId) { + uint32_t bytes = sizeof(SyncSnapshotSend) + dataLen; + SyncSnapshotSend* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_SNAPSHOT_SEND; + pMsg->dataLen = dataLen; + return pMsg; +} + +void syncSnapshotSendDestroy(SyncSnapshotSend* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncSnapshotSendSerialize(const SyncSnapshotSend* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncSnapshotSendDeserialize(const char* buf, uint32_t len, SyncSnapshotSend* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); + assert(pMsg->bytes == sizeof(SyncSnapshotSend) + pMsg->dataLen); +} + +char* syncSnapshotSendSerialize2(const SyncSnapshotSend* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncSnapshotSendSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncSnapshotSend* syncSnapshotSendDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncSnapshotSend* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncSnapshotSendDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncSnapshotSend2RpcMsg(const SyncSnapshotSend* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncSnapshotSendSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncSnapshotSendFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotSend* pMsg) { + syncSnapshotSendDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncSnapshotSend* syncSnapshotSendFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncSnapshotSend* pMsg = syncSnapshotSendDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + cJSON* pSrcId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->srcId.addr); + cJSON_AddStringToObject(pSrcId, "addr", u64buf); + { + uint64_t u64 = pMsg->srcId.addr; + cJSON* pTmp = pSrcId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pSrcId, "vgId", pMsg->srcId.vgId); + cJSON_AddItemToObject(pRoot, "srcId", pSrcId); + + cJSON* pDestId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->destId.addr); + cJSON_AddStringToObject(pDestId, "addr", u64buf); + { + uint64_t u64 = pMsg->destId.addr; + cJSON* pTmp = pDestId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); + cJSON_AddItemToObject(pRoot, "destId", pDestId); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastIndex); + cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->lastTerm); + cJSON_AddStringToObject(pRoot, "lastTerm", u64buf); + + cJSON_AddNumberToObject(pRoot, "seq", pMsg->seq); + + cJSON_AddNumberToObject(pRoot, "dataLen", pMsg->dataLen); + char* s; + s = syncUtilprintBin((char*)(pMsg->data), pMsg->dataLen); + cJSON_AddStringToObject(pRoot, "data", s); + taosMemoryFree(s); + s = syncUtilprintBin2((char*)(pMsg->data), pMsg->dataLen); + cJSON_AddStringToObject(pRoot, "data2", s); + taosMemoryFree(s); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncSnapshotSend", pRoot); + return pJson; +} + +char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg) { + cJSON* pJson = syncSnapshotSend2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + printf("syncSnapshotSendPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); taosMemoryFree(serialized); } + +void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + printf("syncSnapshotSendPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotSendLog(const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + sTrace("syncSnapshotSendLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncSnapshotSend2Str(pMsg); + sTrace("syncSnapshotSendLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +// --------------------------------------------- +SyncSnapshotRsp* syncSnapshotRspBuild(int32_t vgId) { + uint32_t bytes = sizeof(SyncSnapshotRsp); + SyncSnapshotRsp* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_SNAPSHOT_RSP; + return pMsg; +} + +void syncSnapshotRspDestroy(SyncSnapshotRsp* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncSnapshotRspSerialize(const SyncSnapshotRsp* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncSnapshotRspDeserialize(const char* buf, uint32_t len, SyncSnapshotRsp* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); +} + +char* syncSnapshotRspSerialize2(const SyncSnapshotRsp* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncSnapshotRspSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncSnapshotRsp* syncSnapshotRspDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncSnapshotRsp* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncSnapshotRspDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncSnapshotRsp2RpcMsg(const SyncSnapshotRsp* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncSnapshotRspSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncSnapshotRspFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotRsp* pMsg) { + syncSnapshotRspDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncSnapshotRsp* syncSnapshotRspFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncSnapshotRsp* pMsg = syncSnapshotRspDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + cJSON* pSrcId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->srcId.addr); + cJSON_AddStringToObject(pSrcId, "addr", u64buf); + { + uint64_t u64 = pMsg->srcId.addr; + cJSON* pTmp = pSrcId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pSrcId, "vgId", pMsg->srcId.vgId); + cJSON_AddItemToObject(pRoot, "srcId", pSrcId); + + cJSON* pDestId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->destId.addr); + cJSON_AddStringToObject(pDestId, "addr", u64buf); + { + uint64_t u64 = pMsg->destId.addr; + cJSON* pTmp = pDestId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); + cJSON_AddItemToObject(pRoot, "destId", pDestId); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastIndex); + cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->lastTerm); + cJSON_AddStringToObject(pRoot, "lastTerm", u64buf); + + cJSON_AddNumberToObject(pRoot, "ack", pMsg->ack); + cJSON_AddNumberToObject(pRoot, "code", pMsg->code); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncSnapshotRsp", pRoot); + return pJson; +} + +char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg) { + cJSON* pJson = syncSnapshotRsp2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + printf("syncSnapshotRspPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + printf("syncSnapshotRspPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + sTrace("syncSnapshotRspLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncSnapshotRsp2Str(pMsg); + sTrace("syncSnapshotRspLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 3e1931e2c3..95eec5d98f 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -148,6 +148,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) { cJSON *pRoot = cJSON_CreateObject(); cJSON_AddItemToObject(pRoot, "SSyncCfg", syncCfg2Json(&(pRaftCfg->cfg))); cJSON_AddNumberToObject(pRoot, "isStandBy", pRaftCfg->isStandBy); + cJSON_AddNumberToObject(pRoot, "snapshotEnable", pRaftCfg->snapshotEnable); cJSON *pJson = cJSON_CreateObject(); cJSON_AddItemToObject(pJson, "RaftCfg", pRoot); @@ -161,7 +162,7 @@ char *raftCfg2Str(SRaftCfg *pRaftCfg) { return serialized; } -int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path) { +int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path) { assert(pCfg != NULL); TdFilePtr pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE); @@ -169,7 +170,8 @@ int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path) { SRaftCfg raftCfg; raftCfg.cfg = *pCfg; - raftCfg.isStandBy = isStandBy; + raftCfg.isStandBy = meta.isStandBy; + raftCfg.snapshotEnable = meta.snapshotEnable; char *s = raftCfg2Str(&raftCfg); char buf[CONFIG_FILE_LEN] = {0}; @@ -194,6 +196,9 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) { cJSON *pJsonIsStandBy = cJSON_GetObjectItem(pJson, "isStandBy"); pRaftCfg->isStandBy = cJSON_GetNumberValue(pJsonIsStandBy); + cJSON *pJsonSnapshotEnable = cJSON_GetObjectItem(pJson, "snapshotEnable"); + pRaftCfg->snapshotEnable = cJSON_GetNumberValue(pJsonSnapshotEnable); + cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg)); ASSERT(code == 0); diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index b353ed85db..49509ae979 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -16,6 +16,23 @@ #include "syncRaftLog.h" #include "wal.h" +// refactor, log[0 .. n] ==> log[m .. n] +static int32_t raftLogSetBeginIndex(struct SSyncLogStore* pLogStore, SyncIndex beginIndex); +static SyncIndex raftLogBeginIndex(struct SSyncLogStore* pLogStore); +static SyncIndex raftLogEndIndex(struct SSyncLogStore* pLogStore); +static SyncIndex raftLogWriteIndex(struct SSyncLogStore* pLogStore); +static bool raftLogIsEmpty(struct SSyncLogStore* pLogStore); +static int32_t raftLogEntryCount(struct SSyncLogStore* pLogStore); +static bool raftLogInRange(struct SSyncLogStore* pLogStore, SyncIndex index); +static SyncIndex raftLogLastIndex(struct SSyncLogStore* pLogStore); +static SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore); +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); +static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); +static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); + +static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry); + +//------------------------------- static SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore); static SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore); static SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore); @@ -25,6 +42,202 @@ static int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex from static int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); +// refactor, log[0 .. n] ==> log[m .. n] +static int32_t raftLogSetBeginIndex(struct SSyncLogStore* pLogStore, SyncIndex beginIndex) { + sTrace("raftLogSetBeginIndex beginIndex:%ld", beginIndex); + + // if beginIndex == 0, donot need call this funciton + ASSERT(beginIndex > 0); + + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + pData->beginIndex = beginIndex; + walRestoreFromSnapshot(pWal, beginIndex - 1); + return 0; +} + +int32_t raftLogResetBeginIndex(struct SSyncLogStore* pLogStore) { return 0; } + +static SyncIndex raftLogBeginIndex(struct SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + return pData->beginIndex; +} + +static SyncIndex raftLogEndIndex(struct SSyncLogStore* pLogStore) { return raftLogLastIndex(pLogStore); } + +static bool raftLogIsEmpty(struct SSyncLogStore* pLogStore) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + return (endIndex < beginIndex); +} + +static int32_t raftLogEntryCount(struct SSyncLogStore* pLogStore) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + int32_t count = endIndex - beginIndex + 1; + return count > 0 ? count : 0; +} + +static bool raftLogInRange(struct SSyncLogStore* pLogStore, SyncIndex index) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + if (index >= beginIndex && index <= endIndex) { + return true; + } else { + return false; + } +} + +static SyncIndex raftLogLastIndex(struct SSyncLogStore* pLogStore) { + SyncIndex lastIndex; + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + SyncIndex lastVer = walGetLastVer(pWal); + SyncIndex firstVer = walGetFirstVer(pWal); + + if (lastVer < firstVer) { + // no record + lastIndex = -1; + + } else { + if (firstVer >= 0) { + lastIndex = lastVer; + } else if (firstVer == -1) { + lastIndex = -1; + } else { + ASSERT(0); + } + } + + return lastIndex; +} + +static SyncIndex raftLogWriteIndex(struct SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + SyncIndex lastVer = walGetLastVer(pWal); + return lastVer + 1; +} + +static SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore) { + SyncTerm lastTerm = 0; + if (raftLogEntryCount(pLogStore) == 0) { + lastTerm = 0; + } else { + SSyncRaftEntry* pLastEntry; + int32_t code = raftLogGetLastEntry(pLogStore, &pLastEntry); + ASSERT(code == 0); + if (pLastEntry != NULL) { + lastTerm = pLastEntry->term; + taosMemoryFree(pLastEntry); + } + } + return lastTerm; +} + +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + + SyncIndex writeIndex = raftLogWriteIndex(pLogStore); + ASSERT(pEntry->index == writeIndex); + + int code = 0; + SSyncLogMeta syncMeta; + syncMeta.isWeek = pEntry->isWeak; + syncMeta.seqNum = pEntry->seqNum; + syncMeta.term = pEntry->term; + code = walWriteWithSyncInfo(pWal, pEntry->index, pEntry->originalRpcType, syncMeta, pEntry->data, pEntry->dataLen); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogAppendEntry error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + } + + walFsync(pWal, true); + + sTrace("sync event write index:%" PRId64, pEntry->index); + + return code; +} + +static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + int32_t code; + + *ppEntry = NULL; + if (raftLogInRange(pLogStore, index)) { + SWalReadHandle* pWalHandle = walOpenReadHandle(pWal); + ASSERT(pWalHandle != NULL); + + code = walReadWithHandle(pWalHandle, index); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogGetEntry error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + walCloseReadHandle(pWalHandle); + return code; + } + + *ppEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen); + ASSERT(*ppEntry != NULL); + (*ppEntry)->msgType = TDMT_SYNC_CLIENT_REQUEST; + (*ppEntry)->originalRpcType = pWalHandle->pHead->head.msgType; + (*ppEntry)->seqNum = pWalHandle->pHead->head.syncMeta.seqNum; + (*ppEntry)->isWeak = pWalHandle->pHead->head.syncMeta.isWeek; + (*ppEntry)->term = pWalHandle->pHead->head.syncMeta.term; + (*ppEntry)->index = index; + ASSERT((*ppEntry)->dataLen == pWalHandle->pHead->head.bodyLen); + memcpy((*ppEntry)->data, pWalHandle->pHead->head.body, pWalHandle->pHead->head.bodyLen); + + // need to hold, do not new every time!! + walCloseReadHandle(pWalHandle); + + } else { + // index not in range + code = 0; + } + + return code; +} + +static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + int32_t code = walRollback(pWal, fromIndex); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogTruncate error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + } + return code; +} + +static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry) { + *ppLastEntry = NULL; + if (raftLogEntryCount(pLogStore) == 0) { + return 0; + } + SyncIndex lastIndex = raftLogLastIndex(pLogStore); + int32_t code = raftLogGetEntry(pLogStore, lastIndex, ppLastEntry); + return code; +} + +//------------------------------- SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { SSyncLogStore* pLogStore = taosMemoryMalloc(sizeof(SSyncLogStore)); assert(pLogStore != NULL); @@ -36,6 +249,16 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { pData->pSyncNode = pSyncNode; pData->pWal = pSyncNode->pWal; + SyncIndex firstVer = walGetFirstVer(pData->pWal); + SyncIndex lastVer = walGetLastVer(pData->pWal); + if (firstVer >= 0) { + pData->beginIndex = firstVer; + } else if (firstVer == -1) { + pData->beginIndex = lastVer + 1; + } else { + ASSERT(0); + } + pLogStore->appendEntry = logStoreAppendEntry; pLogStore->getEntry = logStoreGetEntry; pLogStore->truncate = logStoreTruncate; @@ -43,6 +266,20 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { pLogStore->getLastTerm = logStoreLastTerm; pLogStore->updateCommitIndex = logStoreUpdateCommitIndex; pLogStore->getCommitIndex = logStoreGetCommitIndex; + + pLogStore->syncLogSetBeginIndex = raftLogSetBeginIndex; + pLogStore->syncLogBeginIndex = raftLogBeginIndex; + pLogStore->syncLogEndIndex = raftLogEndIndex; + pLogStore->syncLogIsEmpty = raftLogIsEmpty; + pLogStore->syncLogEntryCount = raftLogEntryCount; + pLogStore->syncLogInRange = raftLogInRange; + pLogStore->syncLogLastIndex = raftLogLastIndex; + pLogStore->syncLogLastTerm = raftLogLastTerm; + pLogStore->syncLogAppendEntry = raftLogAppendEntry; + pLogStore->syncLogGetEntry = raftLogGetEntry; + pLogStore->syncLogTruncate = raftLogTruncate; + pLogStore->syncLogWriteIndex = raftLogWriteIndex; + return pLogStore; } @@ -53,6 +290,7 @@ void logStoreDestory(SSyncLogStore* pLogStore) { } } +//------------------------------- int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; @@ -78,6 +316,8 @@ int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { // assert(code == 0); walFsync(pWal, true); + + sTrace("sync event old write wal: %ld", pEntry->index); return code; } @@ -136,7 +376,7 @@ int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex) { linuxErrMsg); ASSERT(0); } - return 0; // to avoid compiler error + return 0; } SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore) { @@ -169,7 +409,7 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { sError("walCommit error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, linuxErrMsg); ASSERT(0); } - return 0; // to avoid compiler error + return 0; } SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore) { @@ -199,15 +439,32 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) { cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pData->pWal); cJSON_AddStringToObject(pRoot, "pWal", u64buf); - snprintf(u64buf, sizeof(u64buf), "%ld", logStoreLastIndex(pLogStore)); + + snprintf(u64buf, sizeof(u64buf), "%ld", pData->beginIndex); + cJSON_AddStringToObject(pRoot, "beginIndex", u64buf); + + SyncIndex endIndex = raftLogEndIndex(pLogStore); + snprintf(u64buf, sizeof(u64buf), "%ld", endIndex); + cJSON_AddStringToObject(pRoot, "endIndex", u64buf); + + int32_t count = raftLogEntryCount(pLogStore); + cJSON_AddNumberToObject(pRoot, "entryCount", count); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogWriteIndex(pLogStore)); + cJSON_AddStringToObject(pRoot, "WriteIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%d", raftLogIsEmpty(pLogStore)); + cJSON_AddStringToObject(pRoot, "IsEmpty", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogLastIndex(pLogStore)); cJSON_AddStringToObject(pRoot, "LastIndex", u64buf); - snprintf(u64buf, sizeof(u64buf), "%lu", logStoreLastTerm(pLogStore)); + snprintf(u64buf, sizeof(u64buf), "%lu", raftLogLastTerm(pLogStore)); cJSON_AddStringToObject(pRoot, "LastTerm", u64buf); cJSON* pEntries = cJSON_CreateArray(); cJSON_AddItemToObject(pRoot, "pEntries", pEntries); - SyncIndex lastIndex = logStoreLastIndex(pLogStore); - for (SyncIndex i = 0; i <= lastIndex; ++i) { + + for (SyncIndex i = pData->beginIndex; i <= endIndex; ++i) { SSyncRaftEntry* pEntry = logStoreGetEntry(pLogStore, i); cJSON_AddItemToArray(pEntries, syncEntry2Json(pEntry)); syncEntryDestory(pEntry); @@ -236,9 +493,26 @@ cJSON* logStoreSimple2Json(SSyncLogStore* pLogStore) { cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pData->pWal); cJSON_AddStringToObject(pRoot, "pWal", u64buf); - snprintf(u64buf, sizeof(u64buf), "%ld", logStoreLastIndex(pLogStore)); + + snprintf(u64buf, sizeof(u64buf), "%ld", pData->beginIndex); + cJSON_AddStringToObject(pRoot, "beginIndex", u64buf); + + SyncIndex endIndex = raftLogEndIndex(pLogStore); + snprintf(u64buf, sizeof(u64buf), "%ld", endIndex); + cJSON_AddStringToObject(pRoot, "endIndex", u64buf); + + int32_t count = raftLogEntryCount(pLogStore); + cJSON_AddNumberToObject(pRoot, "entryCount", count); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogWriteIndex(pLogStore)); + cJSON_AddStringToObject(pRoot, "WriteIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%d", raftLogIsEmpty(pLogStore)); + cJSON_AddStringToObject(pRoot, "IsEmpty", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogLastIndex(pLogStore)); cJSON_AddStringToObject(pRoot, "LastIndex", u64buf); - snprintf(u64buf, sizeof(u64buf), "%lu", logStoreLastTerm(pLogStore)); + snprintf(u64buf, sizeof(u64buf), "%lu", raftLogLastTerm(pLogStore)); cJSON_AddStringToObject(pRoot, "LastTerm", u64buf); } @@ -254,6 +528,12 @@ char* logStoreSimple2Str(SSyncLogStore* pLogStore) { return serialized; } +SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + return walGetFirstVer(pWal); +} + // for debug ----------------- void logStorePrint(SSyncLogStore* pLogStore) { char* serialized = logStore2Str(pLogStore); @@ -303,7 +583,9 @@ void logStoreSimpleLog(SSyncLogStore* pLogStore) { } void logStoreSimpleLog2(char* s, SSyncLogStore* pLogStore) { - char* serialized = logStoreSimple2Str(pLogStore); - sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = logStoreSimple2Str(pLogStore); + sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index d17e64d936..ff39b0b13d 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -16,9 +16,11 @@ #include "syncReplication.h" #include "syncIndexMgr.h" #include "syncMessage.h" +#include "syncRaftCfg.h" #include "syncRaftEntry.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" // TLA+ Spec @@ -59,6 +61,7 @@ int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) { // set prevLogIndex SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); + SyncIndex preLogIndex = nextIndex - 1; // set preLogTerm @@ -113,9 +116,87 @@ int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) { return ret; } +int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) { + ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER); + + syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex); + syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex); + logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore); + { + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + sTrace("begin append entries peers, snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + int32_t ret = 0; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SRaftId* pDestId = &(pSyncNode->peersId[i]); + + // next index + SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); + + // pre index, pre term + SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); + SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); + + // batch optimized + // SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex); + + // prepare entry + SyncAppendEntries* pMsg = NULL; + + SSyncRaftEntry* pEntry; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); + ASSERT(code == 0); + + if (pEntry != NULL) { + pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId); + ASSERT(pMsg != NULL); + + // add pEntry into msg + uint32_t len; + char* serialized = syncEntrySerialize(pEntry, &len); + assert(len == pEntry->bytes); + memcpy(pMsg->data, serialized, len); + + taosMemoryFree(serialized); + syncEntryDestory(pEntry); + + } else { + // no entry in log + pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId); + ASSERT(pMsg != NULL); + } + + // prepare msg + ASSERT(pMsg != NULL); + pMsg->srcId = pSyncNode->myRaftId; + pMsg->destId = *pDestId; + pMsg->term = pSyncNode->pRaftStore->currentTerm; + pMsg->prevLogIndex = preLogIndex; + pMsg->prevLogTerm = preLogTerm; + pMsg->commitIndex = pSyncNode->commitIndex; + pMsg->privateTerm = 0; + // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); + + // send msg + syncNodeAppendEntries(pSyncNode, pDestId, pMsg); + syncAppendEntriesDestroy(pMsg); + } + + return ret; +} + int32_t syncNodeReplicate(SSyncNode* pSyncNode) { // start replicate - int32_t ret = syncNodeAppendEntriesPeers(pSyncNode); + int32_t ret = 0; + + if (pSyncNode->pRaftCfg->snapshotEnable) { + ret = syncNodeAppendEntriesPeersSnapshot(pSyncNode); + } else { + ret = syncNodeAppendEntriesPeers(pSyncNode); + } return ret; } diff --git a/source/libs/sync/src/syncRequestVote.c b/source/libs/sync/src/syncRequestVote.c index 2656771292..9ed7f00982 100644 --- a/source/libs/sync/src/syncRequestVote.c +++ b/source/libs/sync/src/syncRequestVote.c @@ -15,6 +15,7 @@ #include "syncRequestVote.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -62,6 +63,9 @@ int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) { // maybe has already voted for pMsg->srcId // vote again, no harm raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + + // forbid elect for this round + syncNodeResetElectTimer(ths); } SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId); @@ -77,3 +81,64 @@ int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) { return ret; } + +static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) { + SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode); + SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); + + if (pMsg->lastLogTerm > myLastTerm) { + return true; + } + if (pMsg->lastLogTerm == myLastTerm && pMsg->lastLogIndex >= myLastIndex) { + return true; + } + + return false; +} + +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVote, currentTerm:%lu", ths->pRaftStore->currentTerm); + syncRequestVoteLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncRequestVote maybe replica already dropped"); + return ret; + } + + // maybe update term + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncNodeUpdateTerm(ths, pMsg->term); + } + ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); + + bool logOK = syncNodeOnRequestVoteLogOK(ths, pMsg); + bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK && + ((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId)))); + if (grant) { + // maybe has already voted for pMsg->srcId + // vote again, no harm + raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + + // forbid elect for this round + syncNodeResetElectTimer(ths); + } + + // send msg + SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->voteGranted = grant; + + SRpcMsg rpcMsg; + syncRequestVoteReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncRequestVoteReplyDestroy(pReply); + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncRequestVoteReply.c b/source/libs/sync/src/syncRequestVoteReply.c index 75236aee2b..5d041cefcd 100644 --- a/source/libs/sync/src/syncRequestVoteReply.c +++ b/source/libs/sync/src/syncRequestVoteReply.c @@ -15,6 +15,7 @@ #include "syncRequestVoteReply.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -92,3 +93,68 @@ int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) return ret; } + +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVoteReply, term:%lu", ths->pRaftStore->currentTerm); + syncRequestVoteReplyLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncRequestVoteReply, maybe replica already dropped"); + return ret; + } + + // drop stale response + if (pMsg->term < ths->pRaftStore->currentTerm) { + sTrace("recv SyncRequestVoteReply, drop stale response, receive_term:%lu current_term:%lu", pMsg->term, + ths->pRaftStore->currentTerm); + return ret; + } + + // assert(!(pMsg->term > ths->pRaftStore->currentTerm)); + // no need this code, because if I receive reply.term, then I must have sent for that term. + // if (pMsg->term > ths->pRaftStore->currentTerm) { + // syncNodeUpdateTerm(ths, pMsg->term); + // } + + if (pMsg->term > ths->pRaftStore->currentTerm) { + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVoteReply, error term, receive_term:%lu current_term:%lu", + pMsg->term, ths->pRaftStore->currentTerm); + syncNodePrint2(logBuf, ths); + sError("%s", logBuf); + return ret; + } + + ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + + // This tallies votes even when the current state is not Candidate, + // but they won't be looked at, so it doesn't matter. + if (ths->state == TAOS_SYNC_STATE_CANDIDATE) { + votesRespondAdd(ths->pVotesRespond, pMsg); + if (pMsg->voteGranted) { + // add vote + voteGrantedVote(ths->pVotesGranted, pMsg); + + // maybe to leader + if (voteGrantedMajority(ths->pVotesGranted)) { + if (!ths->pVotesGranted->toLeader) { + syncNodeCandidate2Leader(ths); + + // prevent to leader again! + ths->pVotesGranted->toLeader = true; + } + } + } else { + ; + // do nothing + // UNCHANGED <> + } + } + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index ccb0e6071b..a68312d07f 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -14,23 +14,598 @@ */ #include "syncSnapshot.h" +#include "syncIndexMgr.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" -SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode) { return NULL; } +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); -void snapshotSenderDestroy(SSyncSnapshotSender *pSender) {} +//---------------------------------- +SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { + bool condition = (pSyncNode->pFsm->FpSnapshotStartRead != NULL) && (pSyncNode->pFsm->FpSnapshotStopRead != NULL) && + (pSyncNode->pFsm->FpSnapshotDoRead != NULL); -int32_t snapshotSend(SSyncSnapshotSender *pSender) { return 0; } + SSyncSnapshotSender *pSender = NULL; + if (condition) { + pSender = taosMemoryMalloc(sizeof(SSyncSnapshotSender)); + ASSERT(pSender != NULL); + memset(pSender, 0, sizeof(*pSender)); -cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { return NULL; } + pSender->start = false; + pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID; + pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; + pSender->pReader = NULL; + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; + pSender->pSyncNode = pSyncNode; + pSender->replicaIndex = replicaIndex; + pSender->term = pSyncNode->pRaftStore->currentTerm; + pSender->privateTerm = taosGetTimestampMs() + 100; + pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); + pSender->finish = false; + } else { + sError("snapshotSenderCreate cannot create sender"); + } + return pSender; +} -char *snapshotSender2Str(SSyncSnapshotSender *pSender) { return NULL; } +void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { + if (pSender != NULL) { + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + } + taosMemoryFree(pSender); + } +} -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode) { return NULL; } +bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } -void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) {} +// begin send snapshot (current term, seq begin) +void snapshotSenderStart(SSyncSnapshotSender *pSender) { + ASSERT(!snapshotSenderIsStart(pSender)); -int32_t snapshotReceive(SSyncSnapshotReceiver *pReceiver) { return 0; } + pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; + pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; -cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { return NULL; } + // open snapshot reader + ASSERT(pSender->pReader == NULL); + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStartRead(pSender->pSyncNode->pFsm, &(pSender->pReader)); + ASSERT(ret == 0); -char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver) { return NULL; } + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + } + + pSender->blockLen = 0; + + // get current snapshot info + pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); + + pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; + pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; + ++(pSender->privateTerm); + pSender->finish = false; + pSender->start = true; + + // build begin msg + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; // SYNC_SNAPSHOT_SEQ_BEGIN + pMsg->privateTerm = pSender->privateTerm; + + // send msg + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + sTrace("sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", host, + port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); +} + +#if 0 +// when entry in snapshot, start sender +void snapshotSenderStart(SSyncSnapshotSender *pSender) { + if (!(pSender->start)) { + // start + snapshotSenderDoStart(pSender); + pSender->start = true; + } else { + // already start + ASSERT(pSender->pSyncNode->pRaftStore->currentTerm >= pSender->term); + + // if current term is higher, need start again + if (pSender->pSyncNode->pRaftStore->currentTerm > pSender->term) { + // force peer rollback + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = SYNC_SNAPSHOT_SEQ_FORCE_CLOSE; + + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("snapshot send force close seq:%d ack:%d send msg:%s", pSender->seq, pSender->ack, msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + + // close reader + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader); + ASSERT(ret == 0); + pSender->pReader = NULL; + + // start again + snapshotSenderDoStart(pSender); + pSender->start = true; + } else { + // current term, do nothing + ASSERT(pSender->pSyncNode->pRaftStore->currentTerm == pSender->term); + } + } + + char *s = snapshotSender2Str(pSender); + sInfo("snapshotSenderStart %s", s); + taosMemoryFree(s); +} +#endif + +void snapshotSenderStop(SSyncSnapshotSender *pSender) { + if (pSender->pReader != NULL) { + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader); + ASSERT(ret == 0); + pSender->pReader = NULL; + } + + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + } + + pSender->start = false; + + char *s = snapshotSender2Str(pSender); + sInfo("snapshotSenderStop %s", s); + taosMemoryFree(s); +} + +// when sender receiver ack, call this function to send msg from seq +// seq = ack + 1, already updated +int32_t snapshotSend(SSyncSnapshotSender *pSender) { + // free memory last time (seq - 1) + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + } + + // read data + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotDoRead(pSender->pSyncNode->pFsm, pSender->pReader, + &(pSender->pCurrentBlock), &(pSender->blockLen)); + ASSERT(ret == 0); + if (pSender->blockLen > 0) { + // has read data + } else { + // read finish + pSender->seq = SYNC_SNAPSHOT_SEQ_END; + } + + // build msg + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(pSender->blockLen, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; + pMsg->privateTerm = pSender->privateTerm; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); + + // send msg + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { + sTrace("sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", + host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + msgStr); + } else { + sTrace("sync event snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", + host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + msgStr); + } + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + return 0; +} + +// send snapshot data from cache +int32_t snapshotReSend(SSyncSnapshotSender *pSender) { + if (pSender->pCurrentBlock != NULL) { + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(pSender->blockLen, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); + + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + sTrace("sync event snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", host, port, pSender->seq, pSender->ack, + msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + } + return 0; +} + +cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { + char u64buf[128]; + cJSON *pRoot = cJSON_CreateObject(); + + if (pSender != NULL) { + cJSON_AddNumberToObject(pRoot, "start", pSender->start); + cJSON_AddNumberToObject(pRoot, "seq", pSender->seq); + cJSON_AddNumberToObject(pRoot, "ack", pSender->ack); + + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pReader); + cJSON_AddStringToObject(pRoot, "pReader", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pCurrentBlock); + cJSON_AddStringToObject(pRoot, "pCurrentBlock", u64buf); + cJSON_AddNumberToObject(pRoot, "blockLen", pSender->blockLen); + + if (pSender->pCurrentBlock != NULL) { + char *s; + s = syncUtilprintBin((char *)(pSender->pCurrentBlock), pSender->blockLen); + cJSON_AddStringToObject(pRoot, "pCurrentBlock", s); + taosMemoryFree(s); + s = syncUtilprintBin2((char *)(pSender->pCurrentBlock), pSender->blockLen); + cJSON_AddStringToObject(pRoot, "pCurrentBlock2", s); + taosMemoryFree(s); + } + + cJSON *pSnapshot = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->snapshot.lastApplyIndex); + cJSON_AddStringToObject(pSnapshot, "lastApplyIndex", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->snapshot.lastApplyTerm); + cJSON_AddStringToObject(pSnapshot, "lastApplyTerm", u64buf); + cJSON_AddItemToObject(pRoot, "snapshot", pSnapshot); + + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->sendingMS); + cJSON_AddStringToObject(pRoot, "sendingMS", u64buf); + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pSyncNode); + cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); + cJSON_AddNumberToObject(pRoot, "replicaIndex", pSender->replicaIndex); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + cJSON_AddNumberToObject(pRoot, "finish", pSender->finish); + } + + cJSON *pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SSyncSnapshotSender", pRoot); + return pJson; +} + +char *snapshotSender2Str(SSyncSnapshotSender *pSender) { + cJSON *pJson = snapshotSender2Json(pSender); + char *serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// ------------------------------------- +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { + bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) && + (pSyncNode->pFsm->FpSnapshotDoWrite != NULL); + + SSyncSnapshotReceiver *pReceiver = NULL; + if (condition) { + pReceiver = taosMemoryMalloc(sizeof(SSyncSnapshotReceiver)); + ASSERT(pReceiver != NULL); + memset(pReceiver, 0, sizeof(*pReceiver)); + + pReceiver->start = false; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; + pReceiver->pWriter = NULL; + pReceiver->pSyncNode = pSyncNode; + pReceiver->replicaIndex = replicaIndex; + pReceiver->term = pSyncNode->pRaftStore->currentTerm; + pReceiver->privateTerm = 0; + + } else { + sInfo("snapshotReceiverCreate cannot create receiver"); + } + + return pReceiver; +} + +void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { + if (pReceiver != NULL) { + taosMemoryFree(pReceiver); + } +} + +bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceiver->start; } + +// begin receive snapshot msg (current term, seq begin) +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { + pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; + pReceiver->privateTerm = privateTerm; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; + + ASSERT(pReceiver->pWriter == NULL); + int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStartWrite(pReceiver->pSyncNode->pFsm, &(pReceiver->pWriter)); + ASSERT(ret == 0); +} + +// if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver +// if already start, force close, start again +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { + if (!snapshotReceiverIsStart(pReceiver)) { + // start + snapshotReceiverDoStart(pReceiver, privateTerm); + pReceiver->start = true; + + } else { + // already start + + // force close, abandon incomplete data + int32_t ret = + pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false); + ASSERT(ret == 0); + pReceiver->pWriter = NULL; + + // start again + snapshotReceiverDoStart(pReceiver, privateTerm); + pReceiver->start = true; + + ASSERT(0); + } + + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStart %s", s); + taosMemoryFree(s); +} + +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply) { + if (pReceiver->pWriter != NULL) { + int32_t ret = + pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false); + ASSERT(ret == 0); + pReceiver->pWriter = NULL; + } + + pReceiver->start = false; + + if (apply) { + ++(pReceiver->privateTerm); + } + + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStop %s", s); + taosMemoryFree(s); +} + +cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { + char u64buf[128]; + cJSON *pRoot = cJSON_CreateObject(); + + if (pReceiver != NULL) { + cJSON_AddNumberToObject(pRoot, "start", pReceiver->start); + cJSON_AddNumberToObject(pRoot, "ack", pReceiver->ack); + + snprintf(u64buf, sizeof(u64buf), "%p", pReceiver->pWriter); + cJSON_AddStringToObject(pRoot, "pWriter", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%p", pReceiver->pSyncNode); + cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); + cJSON_AddNumberToObject(pRoot, "replicaIndex", pReceiver->replicaIndex); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + } + + cJSON *pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SSyncSnapshotReceiver", pRoot); + return pJson; +} + +char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver) { + cJSON *pJson = snapshotReceiver2Json(pReceiver); + char *serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// receiver do something +int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { + // get receiver + SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + bool needRsp = false; + int32_t writeCode = 0; + + // state, term, seq/ack + if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { + if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { + if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { + // begin + snapshotReceiverStart(pReceiver, pMsg->privateTerm); + pReceiver->ack = pMsg->seq; + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sTrace("sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, + pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { + // end, finish FSM + writeCode = pSyncNode->pFsm->FpSnapshotDoWrite(pSyncNode->pFsm, pReceiver->pWriter, pMsg->data, pMsg->dataLen); + ASSERT(writeCode == 0); + + pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, true); + + pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, pMsg->lastIndex + 1); + char *logSimpleStr = logStoreSimple2Str(pSyncNode->pLogStore); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sInfo( + "sync event snapshot recv from %s:%d finish, update log begin index:%ld, snapshot.lastApplyIndex:%ld, " + "snapshot.lastApplyTerm:%lu, raft log:%s", + host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, logSimpleStr); + taosMemoryFree(logSimpleStr); + + pReceiver->pWriter = NULL; + snapshotReceiverStop(pReceiver, true); + pReceiver->ack = pMsg->seq; + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, + pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { + pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, false); + snapshotReceiverStop(pReceiver, false); + needRsp = false; + + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, + port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + + taosMemoryFree(msgStr); + + } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { + // transfering + if (pMsg->seq == pReceiver->ack + 1) { + writeCode = + pSyncNode->pFsm->FpSnapshotDoWrite(pSyncNode->pFsm, pReceiver->pWriter, pMsg->data, pMsg->dataLen); + ASSERT(writeCode == 0); + pReceiver->ack = pMsg->seq; + } + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sTrace("sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, + port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else { + ASSERT(0); + } + + if (needRsp) { + SyncSnapshotRsp *pRspMsg = syncSnapshotRspBuild(pSyncNode->vgId); + pRspMsg->srcId = pSyncNode->myRaftId; + pRspMsg->destId = pMsg->srcId; + pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->lastIndex = pMsg->lastIndex; + pRspMsg->lastTerm = pMsg->lastTerm; + pRspMsg->ack = pReceiver->ack; + pRspMsg->code = writeCode; + pRspMsg->privateTerm = pReceiver->privateTerm; + + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pRspMsg, &rpcMsg); + syncNodeSendMsgById(&(pRspMsg->destId), pSyncNode, &rpcMsg); + + syncSnapshotRspDestroy(pRspMsg); + } + } + } else { + syncNodeLog2("syncNodeOnSnapshotSendCb not follower", pSyncNode); + } + + return 0; +} + +// sender receives ack, set seq = ack + 1, send msg from seq +// if ack == SYNC_SNAPSHOT_SEQ_END, stop sender +int32_t syncNodeOnSnapshotRspCb(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { + // get sender + SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); + ASSERT(pSender != NULL); + + // state, term, seq/ack + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { + // receiver ack is finish, close sender + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { + pSender->finish = true; + snapshotSenderStop(pSender); + return 0; + } + + // send next msg + if (pMsg->ack == pSender->seq) { + // update sender ack + pSender->ack = pMsg->ack; + (pSender->seq)++; + snapshotSend(pSender); + + } else if (pMsg->ack == pSender->seq - 1) { + snapshotReSend(pSender); + + } else { + ASSERT(0); + } + } + } else { + syncNodeLog2("syncNodeOnSnapshotRspCb not leader", pSyncNode); + } + + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 48567b75c2..f6ff521e01 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -240,4 +240,26 @@ bool syncUtilUserRollback(tmsg_t msgType) { return true; } return false; +} + +void syncUtilJson2Line(char* jsonStr) { + int p, q, len; + p = 0; + q = 1; + len = strlen(jsonStr); + while (1) { + if (jsonStr[q] == '\0') { + jsonStr[p + 1] = '\0'; + break; + } + + if (jsonStr[q] == '\n' || jsonStr[q] == ' ' || jsonStr[q] == '\t') { + q++; + continue; + } else { + jsonStr[p + 1] = jsonStr[q]; + p++; + q++; + } + } } \ No newline at end of file diff --git a/source/libs/sync/test/CMakeLists.txt b/source/libs/sync/test/CMakeLists.txt index cfbdf0e961..c68c6349fb 100644 --- a/source/libs/sync/test/CMakeLists.txt +++ b/source/libs/sync/test/CMakeLists.txt @@ -38,6 +38,15 @@ add_executable(syncRespMgrTest "") add_executable(syncSnapshotTest "") add_executable(syncApplyMsgTest "") add_executable(syncConfigChangeTest "") +add_executable(syncConfigChangeSnapshotTest "") +add_executable(syncSnapshotSendTest "") +add_executable(syncSnapshotRspTest "") +add_executable(syncSnapshotSenderTest "") +add_executable(syncSnapshotReceiverTest "") +add_executable(syncTestTool "") +add_executable(syncRaftLogTest "") +add_executable(syncRaftLogTest2 "") +add_executable(syncRaftLogTest3 "") target_sources(syncTest @@ -200,6 +209,42 @@ target_sources(syncConfigChangeTest PRIVATE "syncConfigChangeTest.cpp" ) +target_sources(syncConfigChangeSnapshotTest + PRIVATE + "syncConfigChangeSnapshotTest.cpp" +) +target_sources(syncSnapshotSendTest + PRIVATE + "syncSnapshotSendTest.cpp" +) +target_sources(syncSnapshotRspTest + PRIVATE + "syncSnapshotRspTest.cpp" +) +target_sources(syncSnapshotSenderTest + PRIVATE + "syncSnapshotSenderTest.cpp" +) +target_sources(syncSnapshotReceiverTest + PRIVATE + "syncSnapshotReceiverTest.cpp" +) +target_sources(syncTestTool + PRIVATE + "syncTestTool.cpp" +) +target_sources(syncRaftLogTest + PRIVATE + "syncRaftLogTest.cpp" +) +target_sources(syncRaftLogTest2 + PRIVATE + "syncRaftLogTest2.cpp" +) +target_sources(syncRaftLogTest3 + PRIVATE + "syncRaftLogTest3.cpp" +) target_include_directories(syncTest @@ -402,6 +447,51 @@ target_include_directories(syncConfigChangeTest "${TD_SOURCE_DIR}/include/libs/sync" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) +target_include_directories(syncConfigChangeSnapshotTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotSendTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotRspTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotSenderTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotReceiverTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncTestTool + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest2 + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest3 + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) target_link_libraries(syncTest @@ -564,6 +654,42 @@ target_link_libraries(syncConfigChangeTest sync gtest_main ) +target_link_libraries(syncConfigChangeSnapshotTest + sync + gtest_main +) +target_link_libraries(syncSnapshotSendTest + sync + gtest_main +) +target_link_libraries(syncSnapshotRspTest + sync + gtest_main +) +target_link_libraries(syncSnapshotSenderTest + sync + gtest_main +) +target_link_libraries(syncSnapshotReceiverTest + sync + gtest_main +) +target_link_libraries(syncTestTool + sync + gtest_main +) +target_link_libraries(syncRaftLogTest + sync + gtest_main +) +target_link_libraries(syncRaftLogTest2 + sync + gtest_main +) +target_link_libraries(syncRaftLogTest3 + sync + gtest_main +) enable_testing() diff --git a/source/libs/sync/test/syncAppendEntriesReplyTest.cpp b/source/libs/sync/test/syncAppendEntriesReplyTest.cpp index a90259bc3a..d41e99a3cd 100644 --- a/source/libs/sync/test/syncAppendEntriesReplyTest.cpp +++ b/source/libs/sync/test/syncAppendEntriesReplyTest.cpp @@ -22,6 +22,8 @@ SyncAppendEntriesReply *createMsg() { pMsg->destId.vgId = 100; pMsg->success = true; pMsg->matchIndex = 77; + pMsg->term = 33; + pMsg->privateTerm = 44; return pMsg; } diff --git a/source/libs/sync/test/syncAppendEntriesTest.cpp b/source/libs/sync/test/syncAppendEntriesTest.cpp index bb9f306a1c..98b392274e 100644 --- a/source/libs/sync/test/syncAppendEntriesTest.cpp +++ b/source/libs/sync/test/syncAppendEntriesTest.cpp @@ -23,6 +23,7 @@ SyncAppendEntries *createMsg() { pMsg->prevLogIndex = 11; pMsg->prevLogTerm = 22; pMsg->commitIndex = 33; + pMsg->privateTerm = 44; strcpy(pMsg->data, "hello world"); return pMsg; } diff --git a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp new file mode 100644 index 0000000000..781c168da9 --- /dev/null +++ b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp @@ -0,0 +1,366 @@ +#include +#include +#include "os.h" +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +uint16_t gPorts[] = {7010, 7110, 7210, 7310, 7410}; +const char* gDir = "./syncReplicateTest"; +int32_t gVgId = 1234; +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +void init() { + int code = walInit(); + assert(code == 0); + + code = syncInit(); + assert(code == 0); + + sprintf(tsTempDir, "%s", "."); +} + +void cleanup() { walCleanUp(); } + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + SyncIndex beginIndex = SYNC_INDEX_INVALID; + if (pFsm->FpGetSnapshot != NULL) { + SSnapshot snapshot; + pFsm->FpGetSnapshot(pFsm, &snapshot); + beginIndex = snapshot.lastApplyIndex; + } + + if (cbMeta.index > beginIndex) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==CommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); + } else { + sTrace("==callback== ==CommitCb== do not apply again %ld", cbMeta.index); + } +} + +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==PreCommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s flag:%lu\n", pFsm, + cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), cbMeta.flag); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==RollBackCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s flag:%lu\n", pFsm, + cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), cbMeta.flag); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { + *ppReader = (void*)0xABCD; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartRead== pFsm:%p, *ppReader:%p", pFsm, *ppReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopRead== pFsm:%p, pReader:%p", pFsm, pReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { + static int readIter = 0; + + if (readIter == 5) { + *len = 0; + *ppBuf = NULL; + } else if (readIter < 5) { + *len = 20; + *ppBuf = taosMemoryMalloc(*len); + snprintf((char*)*ppBuf, *len, "data iter:%d", readIter); + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotDoRead== pFsm:%p, pReader:%p, *len:%d, *ppBuf:%s, readIter:%d", pFsm, pReader, *len, + (char*)(*ppBuf), readIter); + sTrace("%s", logBuf); + + readIter++; + return 0; +} + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { + *ppWriter = (void*)0xCDEF; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartWrite== pFsm:%p, *ppWriter:%p", pFsm, *ppWriter); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopWrite== pFsm:%p, pWriter:%p, isApply:%d", pFsm, pWriter, + isApply); + sTrace("%s", logBuf); + + if (isApply) { + gSnapshotLastApplyIndex = 10; + gSnapshotLastApplyTerm = 1; + } + + return 0; +} + +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotDoWrite== pFsm:%p, pWriter:%p, len:%d pBuf:%s", pFsm, + pWriter, len, (char*)pBuf); + sTrace("%s", logBuf); + return 0; +} + +void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } + +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { + sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", + cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); +} + +SSyncFSM* createFsm() { + SSyncFSM* pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + memset(pFsm, 0, sizeof(*pFsm)); + + pFsm->FpCommitCb = CommitCb; + pFsm->FpPreCommitCb = PreCommitCb; + pFsm->FpRollBackCb = RollBackCb; + + pFsm->FpGetSnapshot = GetSnapshotCb; + pFsm->FpRestoreFinishCb = RestoreFinishCb; + pFsm->FpSnapshotStartRead = SnapshotStartRead; + pFsm->FpSnapshotStopRead = SnapshotStopRead; + pFsm->FpSnapshotDoRead = SnapshotDoRead; + pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + pFsm->FpReConfigCb = ReConfigCb; + + return pFsm; +} + +SWal* createWal(char* path, int32_t vgId) { + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = vgId; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal* pWal = walOpen(path, &walCfg); + assert(pWal != NULL); + return pWal; +} + +int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* pWal, char* path, bool isStandBy) { + SSyncInfo syncInfo; + syncInfo.vgId = vgId; + syncInfo.msgcb = &gSyncIO->msgcb; + syncInfo.FpSendMsg = syncIOSendMsg; + syncInfo.FpEqMsg = syncIOEqMsg; + syncInfo.pFsm = createFsm(); + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s_sync_replica%d_index%d", path, replicaNum, myIndex); + syncInfo.pWal = pWal; + syncInfo.isStandBy = isStandBy; + syncInfo.snapshotEnable = true; + + SSyncCfg* pCfg = &syncInfo.syncCfg; + + if (isStandBy) { + pCfg->myIndex = 0; + pCfg->replicaNum = 1; + pCfg->nodeInfo[0].nodePort = gPorts[myIndex]; + taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + + } else { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } + + int64_t rid = syncOpen(&syncInfo); + assert(rid > 0); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; + gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; + gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout; + gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest; + + gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; + gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; + gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; + gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; + + gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; + gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + + gSyncIO->pSyncNode = pSyncNode; + syncNodeRelease(pSyncNode); + + return rid; +} + +void configChange(int64_t rid, int32_t replicaNum, int32_t myIndex) { + SSyncCfg syncCfg; + + syncCfg.myIndex = myIndex; + syncCfg.replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + syncCfg.nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(syncCfg.nodeInfo[i].nodeFqdn); + } + + syncReconfig(rid, &syncCfg); +} + +void usage(char* exe) { + printf("usage: %s replicaNum myIndex lastApplyIndex writeRecordNum isStandBy isConfigChange lastApplyTerm \n", exe); +} + +SRpcMsg* createRpcMsg(int i, int count, int myIndex) { + SRpcMsg* pMsg = (SRpcMsg*)taosMemoryMalloc(sizeof(SRpcMsg)); + memset(pMsg, 0, sizeof(SRpcMsg)); + pMsg->msgType = 9999; + pMsg->contLen = 256; + pMsg->pCont = rpcMallocCont(pMsg->contLen); + snprintf((char*)(pMsg->pCont), pMsg->contLen, "value-myIndex:%u-%d-%d-%ld", myIndex, i, count, taosGetTimestampMs()); + return pMsg; +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE + DEBUG_INFO; + if (argc != 8) { + usage(argv[0]); + exit(-1); + } + + int32_t replicaNum = atoi(argv[1]); + int32_t myIndex = atoi(argv[2]); + int32_t lastApplyIndex = atoi(argv[3]); + int32_t writeRecordNum = atoi(argv[4]); + bool isStandBy = atoi(argv[5]); + bool isConfigChange = atoi(argv[6]); + int32_t lastApplyTerm = atoi(argv[7]); + + sTrace( + "args: replicaNum:%d, myIndex:%d, lastApplyIndex:%d, writeRecordNum:%d, isStandBy:%d, isConfigChange:%d, " + "lastApplyTerm:%d", + replicaNum, myIndex, lastApplyIndex, writeRecordNum, isStandBy, isConfigChange, lastApplyTerm); + + gSnapshotLastApplyIndex = lastApplyIndex; + gSnapshotLastApplyTerm = lastApplyTerm; + + if (!isStandBy) { + assert(replicaNum >= 1 && replicaNum <= 5); + assert(myIndex >= 0 && myIndex < replicaNum); + assert(lastApplyIndex >= -1); + assert(writeRecordNum >= 0); + } + + init(); + int32_t ret = syncIOStart((char*)"127.0.0.1", gPorts[myIndex]); + assert(ret == 0); + + char walPath[128]; + snprintf(walPath, sizeof(walPath), "%s_wal_replica%d_index%d", gDir, replicaNum, myIndex); + SWal* pWal = createWal(walPath, gVgId); + + int64_t rid = createSyncNode(replicaNum, myIndex, gVgId, pWal, (char*)gDir, isStandBy); + assert(rid > 0); + + syncStart(rid); + + /* + if (isStandBy) { + syncStartStandBy(rid); + } else { + syncStart(rid); + } + */ + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + + if (isConfigChange) { + configChange(rid, 2, myIndex); + } + + //--------------------------- + int32_t alreadySend = 0; + while (1) { + char* s = syncNode2SimpleStr(pSyncNode); + + if (alreadySend < writeRecordNum) { + SRpcMsg* pRpcMsg = createRpcMsg(alreadySend, writeRecordNum, myIndex); + int32_t ret = syncPropose(rid, pRpcMsg, false); + if (ret == TAOS_SYNC_PROPOSE_NOT_LEADER) { + sTrace("%s value%d write not leader", s, alreadySend); + } else { + assert(ret == 0); + sTrace("%s value%d write ok", s, alreadySend); + } + alreadySend++; + + rpcFreeCont(pRpcMsg->pCont); + taosMemoryFree(pRpcMsg); + } else { + sTrace("%s", s); + } + + taosMsleep(1000); + taosMemoryFree(s); + taosMsleep(1000); + } + + syncNodeRelease(pSyncNode); + syncStop(rid); + walClose(pWal); + syncIOStop(); + cleanup(); + return 0; +} diff --git a/source/libs/sync/test/syncConfigChangeTest.cpp b/source/libs/sync/test/syncConfigChangeTest.cpp index 1ab3ce203a..c9d9ca48aa 100644 --- a/source/libs/sync/test/syncConfigChangeTest.cpp +++ b/source/libs/sync/test/syncConfigChangeTest.cpp @@ -93,7 +93,6 @@ SSyncFSM* createFsm() { pFsm->FpGetSnapshot = GetSnapshotCb; pFsm->FpRestoreFinishCb = RestoreFinishCb; - pFsm->FpReConfigCb = ReConfigCb; return pFsm; diff --git a/source/libs/sync/test/syncIndexMgrTest.cpp b/source/libs/sync/test/syncIndexMgrTest.cpp index 7fcce2bc4f..0ad69f0f51 100644 --- a/source/libs/sync/test/syncIndexMgrTest.cpp +++ b/source/libs/sync/test/syncIndexMgrTest.cpp @@ -22,55 +22,23 @@ int32_t replicaNum = 3; int32_t myIndex = 0; SRaftId ids[TSDB_MAX_REPLICA]; -SSyncInfo syncInfo; -SSyncFSM* pFsm; SSyncNode* pSyncNode; SSyncNode* syncNodeInit() { - syncInfo.vgId = 1234; - syncInfo.msgcb = &gSyncIO->msgcb; - syncInfo.FpSendMsg = syncIOSendMsg; - syncInfo.FpEqMsg = syncIOEqMsg; - syncInfo.pFsm = pFsm; - snprintf(syncInfo.path, sizeof(syncInfo.path), "%s", "./"); - - SSyncCfg* pCfg = &syncInfo.syncCfg; - pCfg->myIndex = myIndex; - pCfg->replicaNum = replicaNum; - + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->replicaNum = replicaNum; for (int i = 0; i < replicaNum; ++i) { - pCfg->nodeInfo[i].nodePort = ports[i]; - snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); - // taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + pSyncNode->replicasId[i].addr = syncUtilAddr2U64("127.0.0.1", ports[i]); + pSyncNode->replicasId[i].vgId = 1234; + + ids[i].addr = pSyncNode->replicasId[i].addr; + ids[i].vgId = pSyncNode->replicasId[i].vgId; } - pSyncNode = syncNodeOpen(&syncInfo); - assert(pSyncNode != NULL); - - gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; - gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; - gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; - gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; - gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; - gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; - gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; - gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; - gSyncIO->pSyncNode = pSyncNode; - return pSyncNode; } -SSyncNode* syncInitTest() { return syncNodeInit(); } - -void initRaftId(SSyncNode* pSyncNode) { - for (int i = 0; i < replicaNum; ++i) { - ids[i] = pSyncNode->replicasId[i]; - char* s = syncUtilRaftId2Str(&ids[i]); - printf("raftId[%d] : %s\n", i, s); - taosMemoryFree(s); - } -} - int main(int argc, char** argv) { tsAsyncLog = 0; sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; @@ -80,58 +48,52 @@ int main(int argc, char** argv) { myIndex = atoi(argv[1]); } - int32_t ret = syncIOStart((char*)"127.0.0.1", ports[myIndex]); - assert(ret == 0); - - ret = syncEnvStart(); - assert(ret == 0); - - SSyncNode* pSyncNode = syncInitTest(); + SSyncNode* pSyncNode = syncNodeInit(); assert(pSyncNode != NULL); - char* serialized = syncNode2Str(pSyncNode); - printf("%s\n", serialized); - taosMemoryFree(serialized); - - initRaftId(pSyncNode); - + printf("---------------------------------------\n"); SSyncIndexMgr* pSyncIndexMgr = syncIndexMgrCreate(pSyncNode); assert(pSyncIndexMgr != NULL); - - printf("---------------------------------------\n"); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); + printf("---------------------------------------\n"); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[0], 100); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[1], 200); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[2], 300); - - printf("---------------------------------------\n"); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[0], 700); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[1], 800); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[2], 900); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); printf("---------------------------------------\n"); for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { SyncIndex idx = syncIndexMgrGetIndex(pSyncIndexMgr, &ids[i]); - printf("index %d : %lu \n", i, idx); + // SyncTerm term = syncIndexMgrGetTerm(pSyncIndexMgr, &ids[i]); + // printf("%d: index:%ld term:%lu \n", i, idx, term); } - - syncIndexMgrClear(pSyncIndexMgr); printf("---------------------------------------\n"); + + printf("---------------------------------------\n"); + syncIndexMgrClear(pSyncIndexMgr); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); syncIndexMgrDestroy(pSyncIndexMgr); return 0; diff --git a/source/libs/sync/test/syncRaftCfgTest.cpp b/source/libs/sync/test/syncRaftCfgTest.cpp index f5b24db651..564cbdb69a 100644 --- a/source/libs/sync/test/syncRaftCfgTest.cpp +++ b/source/libs/sync/test/syncRaftCfgTest.cpp @@ -71,7 +71,10 @@ void test3() { if (taosCheckExistFile(s)) { printf("%s file: %s already exist! \n", (char*)__FUNCTION__, s); } else { - raftCfgCreateFile(pCfg, 7, s); + SRaftCfgMeta meta; + meta.isStandBy = 7; + meta.snapshotEnable = 9; + raftCfgCreateFile(pCfg, meta, s); printf("%s create json file: %s \n", (char*)__FUNCTION__, s); } @@ -94,6 +97,7 @@ void test5() { pCfg->cfg.myIndex = taosGetTimestampSec(); pCfg->isStandBy += 2; + pCfg->snapshotEnable += 3; raftCfgPersist(pCfg); printf("%s update json file: %s myIndex->%d \n", (char*)__FUNCTION__, "./test3_raft_cfg.json", pCfg->cfg.myIndex); diff --git a/source/libs/sync/test/syncRaftLogTest.cpp b/source/libs/sync/test/syncRaftLogTest.cpp new file mode 100644 index 0000000000..7903e86749 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest.cpp @@ -0,0 +1,172 @@ +#include "syncRaftLog.h" +//#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +const char *gWalPath = "./syncLogStoreTest_wal"; + +void init() { walInit(); } + +void test1() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test2() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + for (int i = 0; i < 5; ++i) { + int code = walWrite(pWal, i, 100, "aa", 3); + if (code != 0) { + printf("code:%d terror:%d msg:%s i:%d \n", code, terrno, tstrerror(terrno), i); + assert(0); + } + } + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test3() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test4() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + + for (int i = 6; i < 10; ++i) { + int code = walWrite(pWal, i, 100, "aa", 3); + if (code != 0) { + printf("code:%d terror:%d msg:%s i:%d \n", code, terrno, tstrerror(terrno), i); + assert(0); + } + } + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test5() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + walRestoreFromSnapshot(pWal, 7); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void cleanup() { walCleanUp(); } + +int main(int argc, char **argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + init(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + cleanup(); + return 0; +} diff --git a/source/libs/sync/test/syncRaftLogTest2.cpp b/source/libs/sync/test/syncRaftLogTest2.cpp new file mode 100644 index 0000000000..64e1da51a1 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest2.cpp @@ -0,0 +1,437 @@ +#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SSyncNode* pSyncNode; +SWal* pWal; +SSyncLogStore* pLogStore; +const char* pWalPath = "./syncLogStoreTest_wal"; + +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +bool gAssert = true; + +void init() { + walInit(); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + pWal = walOpen(pWalPath, &walCfg); + assert(pWal != NULL); + + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->pWal = pWal; + + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + pSyncNode->pFsm->FpGetSnapshot = GetSnapshotCb; +} + +void cleanup() { + walClose(pWal); + walCleanUp(); + taosMemoryFree(pSyncNode); +} + +void test1() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest1 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest1 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test2() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + logStoreLog2((char*)"\n\n\ntest2 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest2 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test3() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest3 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + for (int i = 0; i <= 4; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test3 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == 4); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 4); + assert(pLogStore->syncLogLastTerm(pLogStore) == 104); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest3 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == 4); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 4); + assert(pLogStore->syncLogLastTerm(pLogStore) == 104); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test4() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest4 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test4 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest4 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test5() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest5 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test5 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + pLogStore->syncLogTruncate(pLogStore, 7); + logStoreLog2((char*)"after truncate 7", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 6); + assert(pLogStore->syncLogEntryCount(pLogStore) == 2); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 7); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 6); + assert(pLogStore->syncLogLastTerm(pLogStore) == 106); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest5 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 6); + assert(pLogStore->syncLogEntryCount(pLogStore) == 2); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 7); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 6); + assert(pLogStore->syncLogLastTerm(pLogStore) == 106); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test6() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest6 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test6 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + pLogStore->syncLogTruncate(pLogStore, 5); + logStoreLog2((char*)"after truncate 5", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest6 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_INFO + DEBUG_SCREEN + DEBUG_FILE; + + if (argc == 2) { + gAssert = atoi(argv[1]); + } + sTrace("gAssert : %d", gAssert); + + test1(); + test2(); + test3(); + test4(); + test5(); + test6(); + + return 0; +} diff --git a/source/libs/sync/test/syncRaftLogTest3.cpp b/source/libs/sync/test/syncRaftLogTest3.cpp new file mode 100644 index 0000000000..b47f8c96c5 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest3.cpp @@ -0,0 +1,388 @@ +#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +bool gAssert = true; + +SSyncNode* pSyncNode; +SWal* pWal; +SSyncLogStore* pLogStore; +const char* pWalPath = "./syncLogStoreTest_wal"; + +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +void init() { + walInit(); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + pWal = walOpen(pWalPath, &walCfg); + assert(pWal != NULL); + + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->pWal = pWal; + + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + pSyncNode->pFsm->FpGetSnapshot = GetSnapshotCb; +} + +void cleanup() { + walClose(pWal); + walCleanUp(); + taosMemoryFree(pSyncNode); +} + +void test1() { + // no snapshot + // no log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest1 ----- ", pLogStore); + + gSnapshotLastApplyIndex = -1; + gSnapshotLastApplyTerm = 0; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex testIndex = 0; + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, testIndex); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, testIndex); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test1"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + sTrace("%ld's preIndex: %ld", testIndex, preIndex); + sTrace("%ld's preTerm: %lu", testIndex, preTerm); + + if (gAssert) { + assert(lastIndex == -1); + assert(lastTerm == 0); + assert(syncStartIndex == 0); + assert(preIndex == -1); + assert(preTerm == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test2() { + // no snapshot + // whole log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest2 ----- ", pLogStore); + + for (int i = 0; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test2 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = -1; + gSnapshotLastApplyTerm = 0; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test2"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + if (gAssert) { + assert(lastIndex == 10); + assert(lastTerm == 110); + assert(syncStartIndex == 11); + } + + for (SyncIndex i = 11; i >= 0; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + + if (gAssert) { + SyncIndex preIndexArr[12] = {-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + SyncTerm preTermArr[12] = {0, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110}; + + assert(preIndex == preIndexArr[i]); + assert(preTerm == preTermArr[i]); + } + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test3() { + // has snapshot + // no log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest3 ----- ", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, 6); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, 6); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test3"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + sTrace("%d's preIndex: %ld", 6, preIndex); + sTrace("%d's preTerm: %lu", 6, preTerm); + + if (gAssert) { + assert(lastIndex == 5); + assert(lastTerm == 100); + assert(syncStartIndex == 6); + assert(preIndex == 5); + assert(preTerm == 100); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test4() { + // has snapshot + // whole log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest4 ----- ", pLogStore); + + for (int i = 0; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test4 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test4"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + if (gAssert) { + assert(lastIndex == 10); + assert(lastTerm == 110); + assert(syncStartIndex == 11); + } + + for (SyncIndex i = 11; i >= 6; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test5() { + // has snapshot + // partial log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest5 ----- ", pLogStore); + + pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, 6); + for (int i = 6; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test5 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test5"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + for (SyncIndex i = 11; i >= 6; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + + if (gAssert) { + SyncIndex preIndexArr[12] = {9999, 9999, 9999, 9999, 9999, 9999, 5, 6, 7, 8, 9, 10}; + SyncTerm preTermArr[12] = {9999, 9999, 9999, 9999, 9999, 9999, 100, 106, 107, 108, 109, 110}; + + assert(preIndex == preIndexArr[i]); + assert(preTerm == preTermArr[i]); + } + } + + logStoreDestory(pLogStore); + cleanup(); +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_INFO + DEBUG_SCREEN + DEBUG_FILE; + + if (argc == 2) { + gAssert = atoi(argv[1]); + } + sTrace("gAssert : %d", gAssert); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotReceiverTest.cpp b/source/libs/sync/test/syncSnapshotReceiverTest.cpp new file mode 100644 index 0000000000..69670f09a6 --- /dev/null +++ b/source/libs/sync/test/syncSnapshotReceiverTest.cpp @@ -0,0 +1,63 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncRaftStore.h" +#include "syncSnapshot.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} + +void RestoreFinishCb(struct SSyncFSM* pFsm) {} +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) {} + +int32_t GetSnapshot(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { return 0; } + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { return 0; } +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { return 0; } +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { return 0; } + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { return 0; } +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { return 0; } +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { return 0; } + +SSyncSnapshotReceiver* createReceiver() { + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); + pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); + pSyncNode->pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pSyncNode->pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pSyncNode->pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + SSyncSnapshotReceiver* pReceiver = snapshotReceiverCreate(pSyncNode, 2); + pReceiver->start = true; + pReceiver->ack = 20; + pReceiver->pWriter = (void*)0x11; + pReceiver->term = 66; + pReceiver->privateTerm = 99; + + return pReceiver; +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + SSyncSnapshotReceiver* pReceiver = createReceiver(); + sTrace("%s", snapshotReceiver2Str(pReceiver)); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotRspTest.cpp b/source/libs/sync/test/syncSnapshotRspTest.cpp new file mode 100644 index 0000000000..f689d47aaf --- /dev/null +++ b/source/libs/sync/test/syncSnapshotRspTest.cpp @@ -0,0 +1,101 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SyncSnapshotRsp *createMsg() { + SyncSnapshotRsp *pMsg = syncSnapshotRspBuild(1000); + pMsg->srcId.addr = syncUtilAddr2U64("127.0.0.1", 1234); + pMsg->srcId.vgId = 100; + pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); + pMsg->destId.vgId = 100; + pMsg->term = 11; + pMsg->privateTerm = 99; + pMsg->lastIndex = 22; + pMsg->lastTerm = 33; + pMsg->ack = 44; + pMsg->code = 55; + return pMsg; +} + +void test1() { + SyncSnapshotRsp *pMsg = createMsg(); + syncSnapshotRspLog2((char *)"test1:", pMsg); + syncSnapshotRspDestroy(pMsg); +} + +void test2() { + SyncSnapshotRsp *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncSnapshotRspSerialize(pMsg, serialized, len); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspBuild(1000); + syncSnapshotRspDeserialize(serialized, len, pMsg2); + syncSnapshotRspLog2((char *)"test2: syncSnapshotRspSerialize -> syncSnapshotRspDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test3() { + SyncSnapshotRsp *pMsg = createMsg(); + uint32_t len; + char * serialized = syncSnapshotRspSerialize2(pMsg, &len); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspDeserialize2(serialized, len); + syncSnapshotRspLog2((char *)"test3: syncSnapshotRspSerialize2 -> syncSnapshotRspDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test4() { + SyncSnapshotRsp *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotRsp *pMsg2 = (SyncSnapshotRsp *)taosMemoryMalloc(rpcMsg.contLen); + syncSnapshotRspFromRpcMsg(&rpcMsg, pMsg2); + syncSnapshotRspLog2((char *)"test4: syncSnapshotRsp2RpcMsg -> syncSnapshotRspFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test5() { + SyncSnapshotRsp *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspFromRpcMsg2(&rpcMsg); + syncSnapshotRspLog2((char *)"test5: syncSnapshotRsp2RpcMsg -> syncSnapshotRspFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotSendTest.cpp b/source/libs/sync/test/syncSnapshotSendTest.cpp new file mode 100644 index 0000000000..01d3264693 --- /dev/null +++ b/source/libs/sync/test/syncSnapshotSendTest.cpp @@ -0,0 +1,101 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SyncSnapshotSend *createMsg() { + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(20, 1000); + pMsg->srcId.addr = syncUtilAddr2U64("127.0.0.1", 1234); + pMsg->srcId.vgId = 100; + pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); + pMsg->destId.vgId = 100; + pMsg->term = 11; + pMsg->privateTerm = 99; + pMsg->lastIndex = 22; + pMsg->lastTerm = 33; + pMsg->seq = 44; + strcpy(pMsg->data, "hello world"); + return pMsg; +} + +void test1() { + SyncSnapshotSend *pMsg = createMsg(); + syncSnapshotSendLog2((char *)"test1:", pMsg); + syncSnapshotSendDestroy(pMsg); +} + +void test2() { + SyncSnapshotSend *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncSnapshotSendSerialize(pMsg, serialized, len); + SyncSnapshotSend *pMsg2 = syncSnapshotSendBuild(pMsg->dataLen, 1000); + syncSnapshotSendDeserialize(serialized, len, pMsg2); + syncSnapshotSendLog2((char *)"test2: syncSnapshotSendSerialize -> syncSnapshotSendDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test3() { + SyncSnapshotSend *pMsg = createMsg(); + uint32_t len; + char * serialized = syncSnapshotSendSerialize2(pMsg, &len); + SyncSnapshotSend *pMsg2 = syncSnapshotSendDeserialize2(serialized, len); + syncSnapshotSendLog2((char *)"test3: syncSnapshotSendSerialize2 -> syncSnapshotSendDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test4() { + SyncSnapshotSend *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotSend *pMsg2 = (SyncSnapshotSend *)taosMemoryMalloc(rpcMsg.contLen); + syncSnapshotSendFromRpcMsg(&rpcMsg, pMsg2); + syncSnapshotSendLog2((char *)"test4: syncSnapshotSend2RpcMsg -> syncSnapshotSendFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test5() { + SyncSnapshotSend *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotSend *pMsg2 = syncSnapshotSendFromRpcMsg2(&rpcMsg); + syncSnapshotSendLog2((char *)"test5: syncSnapshotSend2RpcMsg -> syncSnapshotSendFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotSenderTest.cpp b/source/libs/sync/test/syncSnapshotSenderTest.cpp new file mode 100644 index 0000000000..404ba2acae --- /dev/null +++ b/source/libs/sync/test/syncSnapshotSenderTest.cpp @@ -0,0 +1,72 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncRaftStore.h" +#include "syncSnapshot.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} + +void RestoreFinishCb(struct SSyncFSM* pFsm) {} +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) {} + +int32_t GetSnapshot(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { return 0; } + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { return 0; } +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { return 0; } +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { return 0; } + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { return 0; } +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { return 0; } +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { return 0; } + +SSyncSnapshotSender* createSender() { + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); + pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); + pSyncNode->pFsm->FpSnapshotStartRead = SnapshotStartRead; + pSyncNode->pFsm->FpSnapshotStopRead = SnapshotStopRead; + pSyncNode->pFsm->FpSnapshotDoRead = SnapshotDoRead; + pSyncNode->pFsm->FpGetSnapshot = GetSnapshot; + + SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, 2); + pSender->start = true; + pSender->seq = 10; + pSender->ack = 20; + pSender->pReader = (void*)0x11; + pSender->blockLen = 20; + pSender->pCurrentBlock = taosMemoryMalloc(pSender->blockLen); + snprintf((char*)(pSender->pCurrentBlock), pSender->blockLen, "%s", "hello"); + + pSender->snapshot.lastApplyIndex = 99; + pSender->snapshot.lastApplyTerm = 88; + pSender->sendingMS = 77; + pSender->term = 66; + pSender->privateTerm = 99; + + return pSender; +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + SSyncSnapshotSender* pSender = createSender(); + sTrace("%s", snapshotSender2Str(pSender)); + + return 0; +} diff --git a/source/libs/sync/test/syncTest.cpp b/source/libs/sync/test/syncTest.cpp index ffe8b81571..97de81572a 100644 --- a/source/libs/sync/test/syncTest.cpp +++ b/source/libs/sync/test/syncTest.cpp @@ -50,14 +50,16 @@ void test4() { } int main(int argc, char** argv) { - // taosInitLog("tmp/syncTest.log", 100); + taosInitLog("/tmp/syncTest.log", 100); tsAsyncLog = 0; + sDebugFlag = DEBUG_SCREEN + DEBUG_FILE + DEBUG_TRACE + DEBUG_INFO + DEBUG_ERROR; test1(); test2(); test3(); test4(); + /* if (argc == 2) { bool bTaosDirExist = taosDirExist(argv[1]); printf("%s bTaosDirExist:%d \n", argv[1], bTaosDirExist); @@ -65,7 +67,8 @@ int main(int argc, char** argv) { bool bTaosCheckExistFile = taosCheckExistFile(argv[1]); printf("%s bTaosCheckExistFile:%d \n", argv[1], bTaosCheckExistFile); } + */ - // taosCloseLog(); + taosCloseLog(); return 0; } diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp new file mode 100644 index 0000000000..782baf3c97 --- /dev/null +++ b/source/libs/sync/test/syncTestTool.cpp @@ -0,0 +1,399 @@ +#include +#include +#include "os.h" +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftCfg.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +uint16_t gPorts[] = {7000, 7001, 7002, 7003, 7004}; +const char* gDir = "./syncTestTool"; +int32_t gVgId = 1234; +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; +int gIterTimes = 0; + +SyncIndex gFinishLastApplyIndex; +SyncIndex gFinishLastApplyTerm; + +void init() { + int code = walInit(); + assert(code == 0); + + code = syncInit(); + assert(code == 0); +} + +void cleanup() { walCleanUp(); } + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==CommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==PreCommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==RollBackCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { + *ppReader = (void*)0xABCD; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartRead== pFsm:%p, *ppReader:%p", pFsm, *ppReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopRead== pFsm:%p, pReader:%p", pFsm, pReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { + static int readIter = 0; + + if (readIter == gIterTimes) { + *len = 0; + *ppBuf = NULL; + } else if (readIter < gIterTimes) { + *len = 20; + *ppBuf = taosMemoryMalloc(*len); + snprintf((char*)*ppBuf, *len, "data iter:%d", readIter); + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotDoRead== pFsm:%p, pReader:%p, *len:%d, *ppBuf:[%s], readIter:%d", pFsm, pReader, + *len, (char*)(*ppBuf), readIter); + sTrace("%s", logBuf); + + readIter++; + return 0; +} + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { + *ppWriter = (void*)0xCDEF; + char logBuf[256] = {0}; + + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartWrite== pFsm:%p, *ppWriter:%p", pFsm, *ppWriter); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { + if (isApply) { + gSnapshotLastApplyIndex = gFinishLastApplyIndex; + gSnapshotLastApplyTerm = gFinishLastApplyTerm; + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotStopWrite== pFsm:%p, pWriter:%p, isApply:%d, gSnapshotLastApplyIndex:%ld, " + "gSnapshotLastApplyTerm:%ld", + pFsm, pWriter, isApply, gSnapshotLastApplyIndex, gSnapshotLastApplyTerm); + sTrace("%s", logBuf); + + return 0; +} + +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotDoWrite== pFsm:%p, pWriter:%p, len:%d pBuf:[%s]", pFsm, + pWriter, len, (char*)pBuf); + sTrace("%s", logBuf); + return 0; +} + +void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb== pFsm:%p", pFsm); } + +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { + char* s = syncCfg2Str(&newCfg); + sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu, newCfg:%s", + cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term, s); + taosMemoryFree(s); +} + +SSyncFSM* createFsm() { + SSyncFSM* pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + memset(pFsm, 0, sizeof(*pFsm)); + + pFsm->FpCommitCb = CommitCb; + pFsm->FpPreCommitCb = PreCommitCb; + pFsm->FpRollBackCb = RollBackCb; + + pFsm->FpReConfigCb = ReConfigCb; + pFsm->FpGetSnapshot = GetSnapshotCb; + pFsm->FpRestoreFinishCb = RestoreFinishCb; + + pFsm->FpSnapshotStartRead = SnapshotStartRead; + pFsm->FpSnapshotStopRead = SnapshotStopRead; + pFsm->FpSnapshotDoRead = SnapshotDoRead; + pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + return pFsm; +} + +SWal* createWal(char* path, int32_t vgId) { + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = vgId; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal* pWal = walOpen(path, &walCfg); + assert(pWal != NULL); + return pWal; +} + +int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* pWal, char* path, bool isStandBy, + bool enableSnapshot) { + SSyncInfo syncInfo; + syncInfo.vgId = vgId; + syncInfo.msgcb = &gSyncIO->msgcb; + syncInfo.FpSendMsg = syncIOSendMsg; + syncInfo.FpEqMsg = syncIOEqMsg; + syncInfo.pFsm = createFsm(); + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s_sync_replica%d_index%d", path, replicaNum, myIndex); + syncInfo.pWal = pWal; + syncInfo.isStandBy = isStandBy; + syncInfo.snapshotEnable = enableSnapshot; + + SSyncCfg* pCfg = &syncInfo.syncCfg; + +#if 0 + { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } +#endif + + if (isStandBy) { + pCfg->myIndex = 0; + pCfg->replicaNum = 1; + pCfg->nodeInfo[0].nodePort = gPorts[myIndex]; + taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + + } else { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } + + + int64_t rid = syncOpen(&syncInfo); + assert(rid > 0); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; + gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; + gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout; + gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest; + + gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; + gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; + gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; + gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; + + gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; + gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + + gSyncIO->pSyncNode = pSyncNode; + syncNodeRelease(pSyncNode); + + return rid; +} + +void configChange(int64_t rid, int32_t newReplicaNum, int32_t myIndex) { + SSyncCfg syncCfg; + + syncCfg.myIndex = myIndex; + syncCfg.replicaNum = newReplicaNum; + + for (int i = 0; i < newReplicaNum; ++i) { + syncCfg.nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(syncCfg.nodeInfo[i].nodeFqdn); + } + + syncReconfig(rid, &syncCfg); +} + +void usage(char* exe) { + printf( + "usage: %s replicaNum(1-5) myIndex(0-..) enableSnapshot(0/1) lastApplyIndex(>=-1) lastApplyTerm(>=0) " + "writeRecordNum(>=0) " + "isStandBy(0/1) isConfigChange(0-5) iterTimes(>=0) finishLastApplyIndex(>=-1) finishLastApplyTerm(>=0) \n", + exe); +} + +SRpcMsg* createRpcMsg(int i, int count, int myIndex) { + SRpcMsg* pMsg = (SRpcMsg*)taosMemoryMalloc(sizeof(SRpcMsg)); + memset(pMsg, 0, sizeof(SRpcMsg)); + pMsg->msgType = 9999; + pMsg->contLen = 256; + pMsg->pCont = rpcMallocCont(pMsg->contLen); + snprintf((char*)(pMsg->pCont), pMsg->contLen, "value-myIndex:%u-%d-%d-%ld", myIndex, i, count, taosGetTimestampMs()); + return pMsg; +} + +int main(int argc, char** argv) { + sprintf(tsTempDir, "%s", "."); + tsAsyncLog = 0; + sDebugFlag = DEBUG_SCREEN + DEBUG_FILE + DEBUG_TRACE + DEBUG_INFO + DEBUG_ERROR; + + if (argc != 12) { + usage(argv[0]); + exit(-1); + } + + int32_t replicaNum = atoi(argv[1]); + int32_t myIndex = atoi(argv[2]); + bool enableSnapshot = atoi(argv[3]); + int32_t lastApplyIndex = atoi(argv[4]); + int32_t lastApplyTerm = atoi(argv[5]); + int32_t writeRecordNum = atoi(argv[6]); + bool isStandBy = atoi(argv[7]); + int32_t isConfigChange = atoi(argv[8]); + int32_t iterTimes = atoi(argv[9]); + int32_t finishLastApplyIndex = atoi(argv[10]); + int32_t finishLastApplyTerm = atoi(argv[11]); + + sTrace( + "args: replicaNum:%d, myIndex:%d, enableSnapshot:%d, lastApplyIndex:%d, lastApplyTerm:%d, writeRecordNum:%d, " + "isStandBy:%d, isConfigChange:%d, iterTimes:%d, finishLastApplyIndex:%d, finishLastApplyTerm:%d", + replicaNum, myIndex, enableSnapshot, lastApplyIndex, lastApplyTerm, writeRecordNum, isStandBy, isConfigChange, + iterTimes, finishLastApplyIndex, finishLastApplyTerm); + + // check parameter + assert(replicaNum >= 1 && replicaNum <= 5); + // assert(myIndex >= 0 && myIndex < replicaNum); + assert(lastApplyIndex >= -1); + assert(lastApplyTerm >= 0); + assert(writeRecordNum >= 0); + assert(isConfigChange >= 0 && isConfigChange <= 5); + assert(iterTimes >= 0); + assert(finishLastApplyIndex >= -1); + assert(finishLastApplyTerm >= 0); + + char logFile[256]; + snprintf(logFile, sizeof(logFile), "/tmp/%s-replicaNum%d-myIndex%d.log", gDir, replicaNum, myIndex); + taosInitLog(logFile, 100); + sTrace("logFile : %s", logFile); + + gSnapshotLastApplyIndex = lastApplyIndex; + gSnapshotLastApplyTerm = lastApplyTerm; + gIterTimes = iterTimes; + + gFinishLastApplyIndex = finishLastApplyIndex; + gFinishLastApplyTerm = finishLastApplyTerm; + + init(); + int32_t ret = syncIOStart((char*)"127.0.0.1", gPorts[myIndex]); + assert(ret == 0); + + char walPath[128]; + snprintf(walPath, sizeof(walPath), "%s_wal_replica%d_index%d", gDir, replicaNum, myIndex); + SWal* pWal = createWal(walPath, gVgId); + + int64_t rid = createSyncNode(replicaNum, myIndex, gVgId, pWal, (char*)gDir, isStandBy, enableSnapshot); + assert(rid > 0); + syncStart(rid); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + + if (isConfigChange > 0) { + configChange(rid, isConfigChange, myIndex); + } + + //--------------------------- + int32_t alreadySend = 0; + while (1) { + char* simpleStr = syncNode2SimpleStr(pSyncNode); + + if (alreadySend < writeRecordNum) { + SRpcMsg* pRpcMsg = createRpcMsg(alreadySend, writeRecordNum, myIndex); + int32_t ret = syncPropose(rid, pRpcMsg, false); + if (ret == TAOS_SYNC_PROPOSE_NOT_LEADER) { + sTrace("%s value%d write not leader", simpleStr, alreadySend); + } else { + assert(ret == 0); + sTrace("%s value%d write ok", simpleStr, alreadySend); + } + alreadySend++; + + rpcFreeCont(pRpcMsg->pCont); + taosMemoryFree(pRpcMsg); + } else { + sTrace("%s", simpleStr); + } + + taosMsleep(1000); + taosMemoryFree(simpleStr); + taosMsleep(1000); + } + + syncNodeRelease(pSyncNode); + syncStop(rid); + walClose(pWal); + syncIOStop(); + cleanup(); + taosCloseLog(); + return 0; +} diff --git a/source/libs/sync/test/syncTimeoutTest.cpp b/source/libs/sync/test/syncTimeoutTest.cpp index 30f25bd1d8..e60fabe38b 100644 --- a/source/libs/sync/test/syncTimeoutTest.cpp +++ b/source/libs/sync/test/syncTimeoutTest.cpp @@ -78,6 +78,26 @@ void test5() { syncTimeoutDestroy(pMsg2); } +void test6() { + SyncTimeout *pMsg = createMsg(); + char * jsonStr = syncTimeout2Str(pMsg); + sTrace("jsonStr: %s", jsonStr); + + syncUtilJson2Line(jsonStr); + sTrace("jsonStr: %s", jsonStr); + + char str[10]; + snprintf(str, sizeof(str), "%s", "{}"); + sTrace("str: %s", str); + syncUtilJson2Line(str); + sTrace("str: %s", str); + + snprintf(str, sizeof(str), "%s", ""); + sTrace("str: %s", str); + syncUtilJson2Line(str); + sTrace("str: %s", str); +} + int main() { tsAsyncLog = 0; sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; @@ -88,6 +108,7 @@ int main() { test3(); test4(); test5(); + test6(); return 0; } diff --git a/source/libs/wal/inc/walInt.h b/source/libs/wal/inc/walInt.h index 84fe2814ff..7ca105ff2b 100644 --- a/source/libs/wal/inc/walInt.h +++ b/source/libs/wal/inc/walInt.h @@ -132,6 +132,7 @@ static inline void walResetVer(SWalVer* pVer) { int walLoadMeta(SWal* pWal); int walSaveMeta(SWal* pWal); +int walRemoveMeta(SWal* pWal); int walRollFileInfo(SWal* pWal); int walCheckAndRepairMeta(SWal* pWal); diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 9aa848a7bb..8e9cb3a84b 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -419,3 +419,12 @@ int walLoadMeta(SWal* pWal) { taosMemoryFree(buf); return code; } + +int walRemoveMeta(SWal* pWal) { + int metaVer = walFindCurMetaVer(pWal); + if (metaVer == -1) return 0; + char fnameStr[WAL_FILE_LEN]; + walBuildMetaName(pWal, metaVer, fnameStr); + taosRemoveFile(fnameStr); + return 0; +} diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index 71cd6de73f..9505b02806 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -75,7 +75,7 @@ void walCleanUp() { } SWal *walOpen(const char *path, SWalCfg *pCfg) { - SWal *pWal = taosMemoryMalloc(sizeof(SWal)); + SWal *pWal = taosMemoryCalloc(1, sizeof(SWal)); if (pWal == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); return NULL; @@ -92,6 +92,13 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { return NULL; } + // init ref + pWal->pRefHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); + if (pWal->pRefHash == NULL) { + taosMemoryFree(pWal); + return NULL; + } + // open meta walResetVer(&pWal->vers); pWal->pWriteLogTFile = NULL; @@ -100,6 +107,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { pWal->fileInfoSet = taosArrayInit(8, sizeof(SWalFileInfo)); if (pWal->fileInfoSet == NULL) { wError("vgId:%d, path:%s, failed to init taosArray %s", pWal->cfg.vgId, pWal->path, strerror(errno)); + taosHashCleanup(pWal->pRefHash); taosMemoryFree(pWal); return NULL; } @@ -115,12 +123,14 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { if (taosThreadMutexInit(&pWal->mutex, NULL) < 0) { taosArrayDestroy(pWal->fileInfoSet); + taosHashCleanup(pWal->pRefHash); taosMemoryFree(pWal); return NULL; } pWal->refId = taosAddRef(tsWal.refSetId, pWal); if (pWal->refId < 0) { + taosHashCleanup(pWal->pRefHash); taosThreadMutexDestroy(&pWal->mutex); taosArrayDestroy(pWal->fileInfoSet); taosMemoryFree(pWal); @@ -130,6 +140,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { walLoadMeta(pWal); if (walCheckAndRepairMeta(pWal) < 0) { + taosHashCleanup(pWal->pRefHash); taosRemoveRef(tsWal.refSetId, pWal->refId); taosThreadMutexDestroy(&pWal->mutex); taosArrayDestroy(pWal->fileInfoSet); @@ -175,6 +186,7 @@ void walClose(SWal *pWal) { walSaveMeta(pWal); taosArrayDestroy(pWal->fileInfoSet); pWal->fileInfoSet = NULL; + taosHashCleanup(pWal->pRefHash); taosThreadMutexUnlock(&pWal->mutex); taosRemoveRef(tsWal.refSetId, pWal->refId); diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 793ab8b2fb..d30e0b6844 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -18,12 +18,47 @@ #include "tchecksum.h" #include "walInt.h" -void walRestoreFromSnapshot(SWal *pWal, int64_t ver) { - /*pWal->vers.firstVer = -1;*/ +int32_t walRestoreFromSnapshot(SWal *pWal, int64_t ver) { + taosThreadMutexLock(&pWal->mutex); + + void *pIter = NULL; + while (1) { + taosHashIterate(pWal->pRefHash, pIter); + if (pIter == NULL) break; + SWalRef *pRef = (SWalRef *)pIter; + if (pRef->ver != -1) { + taosHashCancelIterate(pWal->pRefHash, pIter); + return -1; + } + } + + taosCloseFile(&pWal->pWriteLogTFile); + taosCloseFile(&pWal->pWriteIdxTFile); + + if (pWal->vers.firstVer != -1) { + int32_t fileSetSize = taosArrayGetSize(pWal->fileInfoSet); + for (int32_t i = 0; i < fileSetSize; i++) { + SWalFileInfo *pFileInfo = taosArrayGet(pWal->fileInfoSet, i); + char fnameStr[WAL_FILE_LEN]; + walBuildLogName(pWal, pFileInfo->firstVer, fnameStr); + taosRemoveFile(fnameStr); + } + } + walRemoveMeta(pWal); + + pWal->writeCur = -1; + pWal->totSize = 0; + pWal->lastRollSeq = -1; + + taosArrayClear(pWal->fileInfoSet); + pWal->vers.firstVer = -1; pWal->vers.lastVer = ver; pWal->vers.commitVer = ver - 1; pWal->vers.snapshotVer = ver - 1; pWal->vers.verInSnapshotting = -1; + + taosThreadMutexUnlock(&pWal->mutex); + return 0; } int32_t walCommit(SWal *pWal, int64_t ver) { diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 28bc98a972..5a8cf562a0 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -22,6 +22,7 @@ # ---- dnode ./test.sh -f tsim/dnode/create_dnode.sim +./test.sh -f tsim/dnode/drop_dnode_mnode.sim # ---- insert ./test.sh -f tsim/insert/basic0.sim @@ -56,7 +57,7 @@ # ---- mnode ./test.sh -f tsim/mnode/basic1.sim -./test.sh -f tsim/mnode/basic2.sim +#./test.sh -f tsim/mnode/basic2.sim ./test.sh -f tsim/mnode/basic3.sim ./test.sh -f tsim/mnode/basic4.sim diff --git a/tests/script/tsim/dnode/drop_dnode_mnode.sim b/tests/script/tsim/dnode/drop_dnode_mnode.sim new file mode 100644 index 0000000000..e0a85b9803 --- /dev/null +++ b/tests/script/tsim/dnode/drop_dnode_mnode.sim @@ -0,0 +1,52 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/exec.sh -n dnode1 -s start +system sh/exec.sh -n dnode2 -s start +sql connect + +print =============== step1 create dnode2 +sql create dnode $hostname port 7200 + +$x = 0 +step1: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +if $rows != 2 then + return -1 +endi +if $data(1)[4] != ready then + goto step1 +endi +if $data(2)[4] != ready then + goto step1 +endi + +sql create dnode $hostname port 7300 +sql drop dnode 3 +sql_error drop dnode 1 + +print =============== step2: create mnode +sql create mnode on dnode 2 + +print =============== step3: drop dnode 3 +sql drop dnode 2 +sql show dnodes; +if $rows != 1 then + return -1 +endi + +if $data00 != 1 then + return -1 +endi + +return +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT diff --git a/tests/script/tsim/mnode/basic3.sim b/tests/script/tsim/mnode/basic3.sim index bc70cd7a85..dec036faaf 100644 --- a/tests/script/tsim/mnode/basic3.sim +++ b/tests/script/tsim/mnode/basic3.sim @@ -39,8 +39,11 @@ endi print =============== step2: create mnode 2 sql create mnode on dnode 2 sql create mnode on dnode 3 +return +system sh/exec.sh -n dnode1 -s stop -x SIGKILL sql_error create mnode on dnode 4 + $x = 0 step2: $x = $x + 1 @@ -147,4 +150,4 @@ endi system sh/exec.sh -n dnode1 -s stop system sh/exec.sh -n dnode2 -s stop system sh/exec.sh -n dnode3 -s stop -system sh/exec.sh -n dnode4 -s stop \ No newline at end of file +system sh/exec.sh -n dnode4 -s stop From 6586f78599ad8a3aa536331889d7bbf3e2b62669 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 12:44:58 +0800 Subject: [PATCH 05/16] refactor(sync): add last config index --- include/common/tmsgdef.h | 1 + include/libs/sync/sync.h | 4 + include/libs/sync/syncTools.h | 30 +++ source/dnode/mnode/impl/src/mndMain.c | 6 +- source/libs/sync/inc/syncInt.h | 4 +- source/libs/sync/inc/syncRaftCfg.h | 6 +- source/libs/sync/src/syncAppendEntries.c | 243 +++++++++++++++++- source/libs/sync/src/syncMain.c | 216 +++++++++++----- source/libs/sync/src/syncMessage.c | 167 ++++++++++++ source/libs/sync/src/syncRaftCfg.c | 8 + source/libs/sync/src/syncRaftLog.c | 16 +- source/libs/sync/src/syncSnapshot.c | 6 +- source/libs/sync/src/syncUtil.c | 8 +- source/libs/sync/test/CMakeLists.txt | 14 + .../libs/sync/test/syncLeaderTransferTest.cpp | 101 ++++++++ source/libs/sync/test/syncRaftCfgTest.cpp | 2 + 16 files changed, 746 insertions(+), 86 deletions(-) create mode 100644 source/libs/sync/test/syncLeaderTransferTest.cpp diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 40701640b7..ebebcb0f2a 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -236,6 +236,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_SYNC_CONFIG_CHANGE, "sync-config-change", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_SEND, "sync-snapshot-send", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_RSP, "sync-snapshot-rsp", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_LEADER_TRANSFER, "sync-leader-transfer", NULL, NULL) #if defined(TD_MSG_NUMBER_) TDMT_MAX diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 9d1385bff2..3a77cc1e19 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -48,6 +48,7 @@ typedef enum { TAOS_SYNC_PROPOSE_SUCCESS = 0, TAOS_SYNC_PROPOSE_NOT_LEADER = 1, TAOS_SYNC_PROPOSE_OTHER_ERROR = 2, + TAOS_SYNC_ONLY_ONE_REPLICA = 3, } ESyncProposeCode; typedef enum { @@ -200,6 +201,9 @@ int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg); int32_t syncReconfigRaw(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg); +int32_t syncLeaderTransfer(int64_t rid); +int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader); + // to be moved to static void syncStartNormal(int64_t rid); void syncStartStandBy(int64_t rid); diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index bb50fc141c..a6802fc915 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -456,6 +456,36 @@ void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg); void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg); void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg); +// --------------------------------------------- +typedef struct SyncLeaderTransfer { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + /* + SRaftId srcId; + SRaftId destId; + */ + SRaftId newLeaderId; +} SyncLeaderTransfer; + +SyncLeaderTransfer* syncLeaderTransferBuild(int32_t vgId); +void syncLeaderTransferDestroy(SyncLeaderTransfer* pMsg); +void syncLeaderTransferSerialize(const SyncLeaderTransfer* pMsg, char* buf, uint32_t bufLen); +void syncLeaderTransferDeserialize(const char* buf, uint32_t len, SyncLeaderTransfer* pMsg); +char* syncLeaderTransferSerialize2(const SyncLeaderTransfer* pMsg, uint32_t* len); +SyncLeaderTransfer* syncLeaderTransferDeserialize2(const char* buf, uint32_t len); +void syncLeaderTransfer2RpcMsg(const SyncLeaderTransfer* pMsg, SRpcMsg* pRpcMsg); +void syncLeaderTransferFromRpcMsg(const SRpcMsg* pRpcMsg, SyncLeaderTransfer* pMsg); +SyncLeaderTransfer* syncLeaderTransferFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncLeaderTransfer2Json(const SyncLeaderTransfer* pMsg); +char* syncLeaderTransfer2Str(const SyncLeaderTransfer* pMsg); + +// for debug ---------------------- +void syncLeaderTransferPrint(const SyncLeaderTransfer* pMsg); +void syncLeaderTransferPrint2(char* s, const SyncLeaderTransfer* pMsg); +void syncLeaderTransferLog(const SyncLeaderTransfer* pMsg); +void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg); + // on message ---------------------- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 813e4c30b5..070b2a9643 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -402,7 +402,11 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { char logBuf[512] = {0}; char *syncNodeStr = sync2SimpleStr(pMgmt->sync); - snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + snprintf(logBuf, sizeof(logBuf), "==mndProcessSyncMsg== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + static int64_t mndTick = 0; + if (++mndTick % 1000 == 1) { + mTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); + } syncRpcMsgLog2(logBuf, pMsg); taosMemoryFree(syncNodeStr); diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 83f0bd7dd8..e7777af749 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -159,7 +159,7 @@ typedef struct SSyncNode { SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; SSyncSnapshotReceiver* pNewNodeReceiver; - SSnapshotMeta sMeta; + // SSnapshotMeta sMeta; } SSyncNode; @@ -194,7 +194,7 @@ int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, S cJSON* syncNode2Json(const SSyncNode* pSyncNode); char* syncNode2Str(const SSyncNode* pSyncNode); char* syncNode2SimpleStr(const SSyncNode* pSyncNode); -void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, bool* isDrop); +void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop); SSyncNode* syncNodeAcquire(int64_t rid); void syncNodeRelease(SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncRaftCfg.h b/source/libs/sync/inc/syncRaftCfg.h index 86c5fab87c..e72e1e7be7 100644 --- a/source/libs/sync/inc/syncRaftCfg.h +++ b/source/libs/sync/inc/syncRaftCfg.h @@ -35,6 +35,7 @@ typedef struct SRaftCfg { char path[TSDB_FILENAME_LEN * 2]; int8_t isStandBy; int8_t snapshotEnable; + SyncIndex lastConfigIndex; } SRaftCfg; SRaftCfg *raftCfgOpen(const char *path); @@ -52,8 +53,9 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg); int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg); typedef struct SRaftCfgMeta { - int8_t isStandBy; - int8_t snapshotEnable; + int8_t isStandBy; + int8_t snapshotEnable; + SyncIndex lastConfigIndex; } SRaftCfgMeta; int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 01c95d8241..6b5a86ded9 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -88,6 +88,246 @@ // /\ UNCHANGED <> // /\ UNCHANGED <> // + +int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { + int32_t ret = 0; + + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "==syncNodeOnAppendEntriesCb== term:%lu", ths->pRaftStore->currentTerm); + syncAppendEntriesLog2(logBuf, pMsg); + + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncNodeUpdateTerm(ths, pMsg->term); + } + assert(pMsg->term <= ths->pRaftStore->currentTerm); + + // reset elect timer + if (pMsg->term == ths->pRaftStore->currentTerm) { + ths->leaderCache = pMsg->srcId; + syncNodeResetElectTimer(ths); + } + assert(pMsg->dataLen >= 0); + + SyncTerm localPreLogTerm = 0; + if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { + SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, pMsg->prevLogIndex); + assert(pEntry != NULL); + localPreLogTerm = pEntry->term; + syncEntryDestory(pEntry); + } + + bool logOK = + (pMsg->prevLogIndex == SYNC_INDEX_INVALID) || + ((pMsg->prevLogIndex >= SYNC_INDEX_BEGIN) && + (pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) && (pMsg->prevLogTerm == localPreLogTerm)); + + // reject request + if ((pMsg->term < ths->pRaftStore->currentTerm) || + ((pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK)) { + sTrace( + "syncNodeOnAppendEntriesCb --> reject, pMsg->term:%lu, ths->pRaftStore->currentTerm:%lu, ths->state:%d, " + "logOK:%d", + pMsg->term, ths->pRaftStore->currentTerm, ths->state, logOK); + + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->success = false; + pReply->matchIndex = SYNC_INDEX_INVALID; + + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return ret; + } + + // return to follower state + if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) { + sTrace( + "syncNodeOnAppendEntriesCb --> return to follower, pMsg->term:%lu, ths->pRaftStore->currentTerm:%lu, " + "ths->state:%d, logOK:%d", + pMsg->term, ths->pRaftStore->currentTerm, ths->state, logOK); + + syncNodeBecomeFollower(ths, "from candidate by append entries"); + + // ret or reply? + return ret; + } + + // accept request + if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) { + // preIndex = -1, or has preIndex entry in local log + assert(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)); + + // has extra entries (> preIndex) in local log + bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore); + + // has entries in SyncAppendEntries msg + bool hasAppendEntries = pMsg->dataLen > 0; + + sTrace( + "syncNodeOnAppendEntriesCb --> accept, pMsg->term:%lu, ths->pRaftStore->currentTerm:%lu, ths->state:%d, " + "logOK:%d, hasExtraEntries:%d, hasAppendEntries:%d", + pMsg->term, ths->pRaftStore->currentTerm, ths->state, logOK, hasExtraEntries, hasAppendEntries); + + if (hasExtraEntries && hasAppendEntries) { + // not conflict by default + bool conflict = false; + + SyncIndex extraIndex = pMsg->prevLogIndex + 1; + SSyncRaftEntry* pExtraEntry = ths->pLogStore->getEntry(ths->pLogStore, extraIndex); + assert(pExtraEntry != NULL); + + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + assert(pAppendEntry != NULL); + + // log not match, conflict + assert(extraIndex == pAppendEntry->index); + if (pExtraEntry->term != pAppendEntry->term) { + conflict = true; + } + + if (conflict) { + // roll back + SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore); + SyncIndex delEnd = extraIndex; + + sTrace("syncNodeOnAppendEntriesCb --> conflict:%d, delBegin:%ld, delEnd:%ld", conflict, delBegin, delEnd); + + // notice! reverse roll back! + for (SyncIndex index = delEnd; index >= delBegin; --index) { + if (ths->pFsm->FpRollBackCb != NULL) { + SSyncRaftEntry* pRollBackEntry = ths->pLogStore->getEntry(ths->pLogStore, index); + assert(pRollBackEntry != NULL); + + // if (pRollBackEntry->msgType != TDMT_SYNC_NOOP) { + if (syncUtilUserRollback(pRollBackEntry->msgType)) { + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg); + + SFsmCbMeta cbMeta; + cbMeta.index = pRollBackEntry->index; + cbMeta.isWeak = pRollBackEntry->isWeak; + cbMeta.code = 0; + cbMeta.state = ths->state; + cbMeta.seqNum = pRollBackEntry->seqNum; + ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta); + rpcFreeCont(rpcMsg.pCont); + } + + syncEntryDestory(pRollBackEntry); + } + } + + // delete confict entries + ths->pLogStore->truncate(ths->pLogStore, extraIndex); + + // append new entries + ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); + + // pre commit + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pAppendEntry, &rpcMsg); + if (ths->pFsm != NULL) { + // if (ths->pFsm->FpPreCommitCb != NULL && pAppendEntry->originalRpcType != TDMT_SYNC_NOOP) { + if (ths->pFsm->FpPreCommitCb != NULL && syncUtilUserPreCommit(pAppendEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pAppendEntry->index; + cbMeta.isWeak = pAppendEntry->isWeak; + cbMeta.code = 2; + cbMeta.state = ths->state; + cbMeta.seqNum = pAppendEntry->seqNum; + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + } + rpcFreeCont(rpcMsg.pCont); + } + + // free memory + syncEntryDestory(pExtraEntry); + syncEntryDestory(pAppendEntry); + + } else if (hasExtraEntries && !hasAppendEntries) { + // do nothing + + } else if (!hasExtraEntries && hasAppendEntries) { + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + assert(pAppendEntry != NULL); + + // append new entries + ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); + + // pre commit + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pAppendEntry, &rpcMsg); + if (ths->pFsm != NULL) { + // if (ths->pFsm->FpPreCommitCb != NULL && pAppendEntry->originalRpcType != TDMT_SYNC_NOOP) { + if (ths->pFsm->FpPreCommitCb != NULL && syncUtilUserPreCommit(pAppendEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pAppendEntry->index; + cbMeta.isWeak = pAppendEntry->isWeak; + cbMeta.code = 3; + cbMeta.state = ths->state; + cbMeta.seqNum = pAppendEntry->seqNum; + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + } + rpcFreeCont(rpcMsg.pCont); + + // free memory + syncEntryDestory(pAppendEntry); + + } else if (!hasExtraEntries && !hasAppendEntries) { + // do nothing + + } else { + assert(0); + } + + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->success = true; + + if (hasAppendEntries) { + pReply->matchIndex = pMsg->prevLogIndex + 1; + } else { + pReply->matchIndex = pMsg->prevLogIndex; + } + + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + // maybe update commit index from leader + if (pMsg->commitIndex > ths->commitIndex) { + // has commit entry in local + if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { + SyncIndex beginIndex = ths->commitIndex + 1; + SyncIndex endIndex = pMsg->commitIndex; + + // update commit index + ths->commitIndex = pMsg->commitIndex; + + // call back Wal + ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); + + int32_t code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); + ASSERT(code == 0); + } + } + } + + return ret; +} + + +#if 0 int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { int32_t ret = 0; @@ -375,7 +615,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { // I am in newConfig if (hit) { - syncNodeUpdateConfig(ths, &newSyncCfg, &isDrop); + syncNodeUpdateConfig(ths, &newSyncCfg, pEntry->index, &isDrop); // change isStandBy to normal if (!isDrop) { @@ -437,6 +677,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { return ret; } +#endif static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { int32_t code; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 26dbf6c47a..ea6673e220 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -192,6 +192,40 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg* pSyncCfg) { return ret; } +int32_t syncLeaderTransfer(int64_t rid) { + int32_t ret = 0; + + return ret; +} + +int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return false; + } + assert(rid == pSyncNode->rid); + int32_t ret = 0; + + if (pSyncNode->replicaNum == 1) { + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + sError("only one replica, cannot drop leader"); + return TAOS_SYNC_ONLY_ONE_REPLICA; + } + + SyncLeaderTransfer* pMsg = syncLeaderTransferBuild(pSyncNode->vgId); + pMsg->newLeaderId.addr = syncUtilAddr2U64(newLeader.nodeFqdn, newLeader.nodePort); + pMsg->newLeaderId.vgId = pSyncNode->vgId; + ASSERT(pMsg != NULL); + SRpcMsg rpcMsg = {0}; + syncLeaderTransfer2RpcMsg(pMsg, &rpcMsg); + syncLeaderTransferDestroy(pMsg); + + ret = syncPropose(rid, &rpcMsg, false); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return ret; +} + int32_t syncReconfigRaw(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg) { int32_t ret = 0; char* newconfig = syncCfg2Str((SSyncCfg*)pNewCfg); @@ -206,6 +240,40 @@ int32_t syncReconfigRaw(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg) return ret; } +bool syncCanLeaderTransfer(int64_t rid) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return false; + } + assert(rid == pSyncNode->rid); + + if (pSyncNode->replicaNum == 1) { + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return false; + } + + if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return true; + } + + bool matchOK = true; + if (pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE || pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + SyncIndex myCommitIndex = pSyncNode->commitIndex; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]); + if (peerMatchIndex < myCommitIndex) { + matchOK = false; + } + } + } + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return matchOK; +} + +int32_t syncGiveUpLeader(int64_t rid) { return 0; } + int32_t syncForwardToPeer(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { int32_t ret = syncPropose(rid, pMsg, isWeak); return ret; @@ -241,7 +309,7 @@ int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { return -1; } assert(rid == pSyncNode->rid); - *sMeta = pSyncNode->sMeta; + sMeta->lastConfigIndex = pSyncNode->pRaftCfg->lastConfigIndex; taosReleaseRef(tsNodeRefId, pSyncNode->rid); return 0; @@ -643,7 +711,7 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { // syncNodeBecomeFollower(pSyncNode); // snapshot meta - pSyncNode->sMeta.lastConfigIndex = -1; + // pSyncNode->sMeta.lastConfigIndex = -1; return pSyncNode; } @@ -1076,9 +1144,11 @@ char* syncNode2SimpleStr(const SSyncNode* pSyncNode) { return s; } -void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, bool* isDrop) { +void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg; pSyncNode->pRaftCfg->cfg = *newConfig; + pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex; + int32_t ret = 0; // init internal @@ -1735,23 +1805,79 @@ const char* syncStr(ESyncState state) { } } +static int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { + SyncLeaderTransfer* pSyncLeaderTransfer; + if (syncUtilSameId(&(pSyncLeaderTransfer->newLeaderId), &(ths->myRaftId))) { + } + + return 0; +} + +static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { + SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; + + SSyncCfg newSyncCfg; + int32_t ret = syncCfgFromStr(pRpcMsg->pCont, &newSyncCfg); + ASSERT(ret == 0); + + // update new config myIndex + bool hit = false; + for (int i = 0; i < newSyncCfg.replicaNum; ++i) { + if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && + ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { + newSyncCfg.myIndex = i; + hit = true; + break; + } + } + + bool isDrop; + + if (hit) { // I am in newConfig + syncNodeUpdateConfig(ths, &newSyncCfg, pEntry->index, &isDrop); + + // change isStandBy to normal + if (!isDrop) { + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncNodeBecomeLeader(ths, "config change"); + } else { + syncNodeBecomeFollower(ths, "config change"); + } + } + + if (gRaftDetailLog) { + char* sOld = syncCfg2Str(&oldSyncCfg); + char* sNew = syncCfg2Str(&newSyncCfg); + sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); + taosMemoryFree(sOld); + taosMemoryFree(sNew); + } + } + + // always call FpReConfigCb + if (ths->pFsm->FpReConfigCb != NULL) { + SReConfigCbMeta cbMeta = {0}; + cbMeta.code = 0; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.index = pEntry->index; + cbMeta.term = pEntry->term; + cbMeta.newCfg = newSyncCfg; + cbMeta.oldCfg = oldSyncCfg; + cbMeta.seqNum = pEntry->seqNum; + cbMeta.flag = 0x11; + cbMeta.isDrop = isDrop; + ths->pFsm->FpReConfigCb(ths->pFsm, pRpcMsg, cbMeta); + } + + return 0; +} + int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { int32_t code = 0; ESyncState state = flag; sInfo("sync event vgId:%d commit by wal from index:%" PRId64 " to index:%" PRId64 ", %s", ths->vgId, beginIndex, endIndex, syncUtilState2String(state)); - /* - // maybe execute by leader, skip snapshot - SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; - if (ths->pFsm->FpGetSnapshot != NULL) { - ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); - } - if (beginIndex <= snapshot.lastApplyIndex) { - beginIndex = snapshot.lastApplyIndex + 1; - } - */ - // execute fsm if (ths->pFsm != NULL) { for (SyncIndex i = beginIndex; i <= endIndex; ++i) { @@ -1764,6 +1890,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, SRpcMsg rpcMsg; syncEntry2OriginalRpc(pEntry, &rpcMsg); + // user commit if (ths->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { SFsmCbMeta cbMeta; cbMeta.index = pEntry->index; @@ -1780,61 +1907,14 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, // config change if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { - SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; + code = syncNodeConfigChange(ths, &rpcMsg, pEntry); + ASSERT(code == 0); + } - SSyncCfg newSyncCfg; - int32_t ret = syncCfgFromStr(rpcMsg.pCont, &newSyncCfg); - ASSERT(ret == 0); - - // update new config myIndex - bool hit = false; - for (int i = 0; i < newSyncCfg.replicaNum; ++i) { - if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && - ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { - newSyncCfg.myIndex = i; - hit = true; - break; - } - } - - SReConfigCbMeta cbMeta = {0}; - bool isDrop; - - // I am in newConfig - if (hit) { - syncNodeUpdateConfig(ths, &newSyncCfg, &isDrop); - - // change isStandBy to normal - if (!isDrop) { - if (ths->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(ths, "config change"); - } else { - syncNodeBecomeFollower(ths, "config change"); - } - } - - if (gRaftDetailLog) { - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); - taosMemoryFree(sOld); - taosMemoryFree(sNew); - } - } - - // always call FpReConfigCb - if (ths->pFsm->FpReConfigCb != NULL) { - cbMeta.code = 0; - cbMeta.currentTerm = ths->pRaftStore->currentTerm; - cbMeta.index = pEntry->index; - cbMeta.term = pEntry->term; - cbMeta.newCfg = newSyncCfg; - cbMeta.oldCfg = oldSyncCfg; - cbMeta.seqNum = pEntry->seqNum; - cbMeta.flag = 0x11; - cbMeta.isDrop = isDrop; - ths->pFsm->FpReConfigCb(ths->pFsm, &rpcMsg, cbMeta); - } + // config change + if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) { + code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry); + ASSERT(code == 0); } // restore finish diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index af04a0f649..2f99a4c744 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -75,6 +75,11 @@ cJSON* syncRpcMsg2Json(SRpcMsg* pRpcMsg) { pRoot = syncSnapshotRsp2Json(pSyncMsg); syncSnapshotRspDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_LEADER_TRANSFER) { + SyncLeaderTransfer* pSyncMsg = syncLeaderTransferDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + pRoot = syncLeaderTransfer2Json(pSyncMsg); + syncLeaderTransferDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_COMMON_RESPONSE) { pRoot = cJSON_CreateObject(); char* s; @@ -2055,4 +2060,166 @@ void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg) { sTrace("syncSnapshotRspLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); taosMemoryFree(serialized); } +} + +// --------------------------------------------- +SyncLeaderTransfer* syncLeaderTransferBuild(int32_t vgId) { + uint32_t bytes = sizeof(SyncLeaderTransfer); + SyncLeaderTransfer* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_LEADER_TRANSFER; + return pMsg; +} + +void syncLeaderTransferDestroy(SyncLeaderTransfer* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncLeaderTransferSerialize(const SyncLeaderTransfer* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncLeaderTransferDeserialize(const char* buf, uint32_t len, SyncLeaderTransfer* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); +} + +char* syncLeaderTransferSerialize2(const SyncLeaderTransfer* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncLeaderTransferSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncLeaderTransfer* syncLeaderTransferDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncLeaderTransfer* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncLeaderTransferDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncLeaderTransfer2RpcMsg(const SyncLeaderTransfer* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncLeaderTransferSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncLeaderTransferFromRpcMsg(const SRpcMsg* pRpcMsg, SyncLeaderTransfer* pMsg) { + syncLeaderTransferDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncLeaderTransfer* syncLeaderTransferFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncLeaderTransfer* pMsg = syncLeaderTransferDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncLeaderTransfer2Json(const SyncLeaderTransfer* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + /* + cJSON* pSrcId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->srcId.addr); + cJSON_AddStringToObject(pSrcId, "addr", u64buf); + { + uint64_t u64 = pMsg->srcId.addr; + cJSON* pTmp = pSrcId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pSrcId, "vgId", pMsg->srcId.vgId); + cJSON_AddItemToObject(pRoot, "srcId", pSrcId); + + cJSON* pDestId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->destId.addr); + cJSON_AddStringToObject(pDestId, "addr", u64buf); + { + uint64_t u64 = pMsg->destId.addr; + cJSON* pTmp = pDestId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); + cJSON_AddItemToObject(pRoot, "destId", pDestId); + */ + + cJSON* pNewerId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->newLeaderId.addr); + cJSON_AddStringToObject(pNewerId, "addr", u64buf); + { + uint64_t u64 = pMsg->newLeaderId.addr; + cJSON* pTmp = pNewerId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pNewerId, "vgId", pMsg->newLeaderId.vgId); + cJSON_AddItemToObject(pRoot, "newLeaderId", pNewerId); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncLeaderTransfer", pRoot); + return pJson; +} + +char* syncLeaderTransfer2Str(const SyncLeaderTransfer* pMsg) { + cJSON* pJson = syncLeaderTransfer2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncLeaderTransferPrint(const SyncLeaderTransfer* pMsg) { + char* serialized = syncLeaderTransfer2Str(pMsg); + printf("syncLeaderTransferPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncLeaderTransferPrint2(char* s, const SyncLeaderTransfer* pMsg) { + char* serialized = syncLeaderTransfer2Str(pMsg); + printf("syncLeaderTransferPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncLeaderTransferLog(const SyncLeaderTransfer* pMsg) { + char* serialized = syncLeaderTransfer2Str(pMsg); + sTrace("syncLeaderTransferLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncLeaderTransfer2Str(pMsg); + sTrace("syncLeaderTransferLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 95eec5d98f..45e00aca2c 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -150,6 +150,10 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) { cJSON_AddNumberToObject(pRoot, "isStandBy", pRaftCfg->isStandBy); cJSON_AddNumberToObject(pRoot, "snapshotEnable", pRaftCfg->snapshotEnable); + char buf64[128]; + snprintf(buf64, sizeof(buf64), "%ld", pRaftCfg->lastConfigIndex); + cJSON_AddStringToObject(pRoot, "lastConfigIndex", buf64); + cJSON *pJson = cJSON_CreateObject(); cJSON_AddItemToObject(pJson, "RaftCfg", pRoot); return pJson; @@ -172,6 +176,7 @@ int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path) { raftCfg.cfg = *pCfg; raftCfg.isStandBy = meta.isStandBy; raftCfg.snapshotEnable = meta.snapshotEnable; + raftCfg.lastConfigIndex = meta.lastConfigIndex; char *s = raftCfg2Str(&raftCfg); char buf[CONFIG_FILE_LEN] = {0}; @@ -199,6 +204,9 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) { cJSON *pJsonSnapshotEnable = cJSON_GetObjectItem(pJson, "snapshotEnable"); pRaftCfg->snapshotEnable = cJSON_GetNumberValue(pJsonSnapshotEnable); + cJSON *pJsonLastConfigIndex = cJSON_GetObjectItem(pJson, "lastConfigIndex"); + pRaftCfg->lastConfigIndex = atoll(cJSON_GetStringValue(pJsonLastConfigIndex)); + cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg)); ASSERT(code == 0); diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index c53e5916ae..92699ab24d 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -553,15 +553,19 @@ void logStorePrint2(char* s, SSyncLogStore* pLogStore) { } void logStoreLog(SSyncLogStore* pLogStore) { - char* serialized = logStore2Str(pLogStore); - sTraceLong("logStoreLog | len:%lu | %s", strlen(serialized), serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = logStore2Str(pLogStore); + sTraceLong("logStoreLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); + } } void logStoreLog2(char* s, SSyncLogStore* pLogStore) { - char* serialized = logStore2Str(pLogStore); - sTraceLong("logStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = logStore2Str(pLogStore); + sTraceLong("logStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // for debug ----------------- diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index a23fe2c38a..ade4ed5d22 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -352,7 +352,7 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { char *snapshotSender2Str(SSyncSnapshotSender *pSender) { cJSON *pJson = snapshotSender2Json(pSender); - char *serialized = cJSON_Print(pJson); + char * serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -473,7 +473,7 @@ cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { cJSON_AddStringToObject(pFromId, "addr", u64buf); { uint64_t u64 = pReceiver->fromId.addr; - cJSON *pTmp = pFromId; + cJSON * pTmp = pFromId; char host[128] = {0}; uint16_t port; syncUtilU642Addr(u64, host, sizeof(host), &port); @@ -497,7 +497,7 @@ cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver) { cJSON *pJson = snapshotReceiver2Json(pReceiver); - char *serialized = cJSON_Print(pJson); + char * serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index f6ff521e01..d12c5058cc 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -214,29 +214,31 @@ void syncUtilMsgNtoH(void* msg) { pHead->vgId = ntohl(pHead->vgId); } +#if 0 bool syncUtilIsData(tmsg_t msgType) { if (msgType == TDMT_SYNC_NOOP || msgType == TDMT_SYNC_CONFIG_CHANGE) { return false; } return true; } +#endif bool syncUtilUserPreCommit(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } return false; } bool syncUtilUserCommit(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } return false; } bool syncUtilUserRollback(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } return false; diff --git a/source/libs/sync/test/CMakeLists.txt b/source/libs/sync/test/CMakeLists.txt index c68c6349fb..d39035ba53 100644 --- a/source/libs/sync/test/CMakeLists.txt +++ b/source/libs/sync/test/CMakeLists.txt @@ -47,6 +47,7 @@ add_executable(syncTestTool "") add_executable(syncRaftLogTest "") add_executable(syncRaftLogTest2 "") add_executable(syncRaftLogTest3 "") +add_executable(syncLeaderTransferTest "") target_sources(syncTest @@ -245,6 +246,10 @@ target_sources(syncRaftLogTest3 PRIVATE "syncRaftLogTest3.cpp" ) +target_sources(syncLeaderTransferTest + PRIVATE + "syncLeaderTransferTest.cpp" +) target_include_directories(syncTest @@ -492,6 +497,11 @@ target_include_directories(syncRaftLogTest3 "${TD_SOURCE_DIR}/include/libs/sync" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) +target_include_directories(syncLeaderTransferTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) target_link_libraries(syncTest @@ -690,6 +700,10 @@ target_link_libraries(syncRaftLogTest3 sync gtest_main ) +target_link_libraries(syncLeaderTransferTest + sync + gtest_main +) enable_testing() diff --git a/source/libs/sync/test/syncLeaderTransferTest.cpp b/source/libs/sync/test/syncLeaderTransferTest.cpp new file mode 100644 index 0000000000..1c3891d492 --- /dev/null +++ b/source/libs/sync/test/syncLeaderTransferTest.cpp @@ -0,0 +1,101 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SyncLeaderTransfer *createMsg() { + SyncLeaderTransfer *pMsg = syncLeaderTransferBuild(1000); + /* + pMsg->srcId.addr = syncUtilAddr2U64("127.0.0.1", 1234); + pMsg->srcId.vgId = 100; + pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); + pMsg->destId.vgId = 100; + */ + pMsg->newLeaderId.addr = syncUtilAddr2U64("127.0.0.1", 9999); + pMsg->newLeaderId.vgId = 100; + return pMsg; +} + +void test1() { + SyncLeaderTransfer *pMsg = createMsg(); + syncLeaderTransferLog2((char *)"test1:", pMsg); + syncLeaderTransferDestroy(pMsg); +} + +void test2() { + SyncLeaderTransfer *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncLeaderTransferSerialize(pMsg, serialized, len); + SyncLeaderTransfer *pMsg2 = syncLeaderTransferBuild(1000); + syncLeaderTransferDeserialize(serialized, len, pMsg2); + syncLeaderTransferLog2((char *)"test2: syncLeaderTransferSerialize -> syncLeaderTransferDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncLeaderTransferDestroy(pMsg); + syncLeaderTransferDestroy(pMsg2); +} + +void test3() { + SyncLeaderTransfer *pMsg = createMsg(); + uint32_t len; + char * serialized = syncLeaderTransferSerialize2(pMsg, &len); + SyncLeaderTransfer *pMsg2 = syncLeaderTransferDeserialize2(serialized, len); + syncLeaderTransferLog2((char *)"test3: syncLeaderTransferSerialize2 -> syncLeaderTransferDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncLeaderTransferDestroy(pMsg); + syncLeaderTransferDestroy(pMsg2); +} + +void test4() { + SyncLeaderTransfer *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncLeaderTransfer2RpcMsg(pMsg, &rpcMsg); + SyncLeaderTransfer *pMsg2 = (SyncLeaderTransfer *)taosMemoryMalloc(rpcMsg.contLen); + syncLeaderTransferFromRpcMsg(&rpcMsg, pMsg2); + syncLeaderTransferLog2((char *)"test4: syncLeaderTransfer2RpcMsg -> syncLeaderTransferFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncLeaderTransferDestroy(pMsg); + syncLeaderTransferDestroy(pMsg2); +} + +void test5() { + SyncLeaderTransfer *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncLeaderTransfer2RpcMsg(pMsg, &rpcMsg); + SyncLeaderTransfer *pMsg2 = syncLeaderTransferFromRpcMsg2(&rpcMsg); + syncLeaderTransferLog2((char *)"test5: syncLeaderTransfer2RpcMsg -> syncLeaderTransferFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncLeaderTransferDestroy(pMsg); + syncLeaderTransferDestroy(pMsg2); +} + +int main() { + gRaftDetailLog = true; + + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncRaftCfgTest.cpp b/source/libs/sync/test/syncRaftCfgTest.cpp index 564cbdb69a..8c6a704e2d 100644 --- a/source/libs/sync/test/syncRaftCfgTest.cpp +++ b/source/libs/sync/test/syncRaftCfgTest.cpp @@ -74,6 +74,7 @@ void test3() { SRaftCfgMeta meta; meta.isStandBy = 7; meta.snapshotEnable = 9; + meta.lastConfigIndex = 789; raftCfgCreateFile(pCfg, meta, s); printf("%s create json file: %s \n", (char*)__FUNCTION__, s); } @@ -98,6 +99,7 @@ void test5() { pCfg->cfg.myIndex = taosGetTimestampSec(); pCfg->isStandBy += 2; pCfg->snapshotEnable += 3; + pCfg->lastConfigIndex += 1000; raftCfgPersist(pCfg); printf("%s update json file: %s myIndex->%d \n", (char*)__FUNCTION__, "./test3_raft_cfg.json", pCfg->cfg.myIndex); From 6adadc0b2606b5c560f64567833dbe8e528ec24e Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 13:03:58 +0800 Subject: [PATCH 06/16] refactor(sync): add last config index --- source/libs/sync/src/syncMain.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index ea6673e220..0184203ee7 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1181,13 +1181,12 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); - // isDrop - *isDrop = true; - bool IamInOld, IamInNew; + bool IamInOld = false; + bool IamInNew = false; for (int i = 0; i < oldConfig.replicaNum; ++i) { if (strcmp((oldConfig.nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (oldConfig.nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { - *isDrop = false; + IamInOld = false; break; } } @@ -1195,16 +1194,21 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l for (int i = 0; i < newConfig->replicaNum; ++i) { if (strcmp((newConfig->nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (newConfig->nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { - *isDrop = false; + IamInNew = false; break; } } - if (!(*isDrop)) { - // change isStandBy to normal - pSyncNode->pRaftCfg->isStandBy = 0; + *isDrop = true; + if (IamInOld && !IamInNew) { + *isDrop = true; + } else { + *isDrop = false; } + if (IamInNew) { + pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal + } raftCfgPersist(pSyncNode->pRaftCfg); if (gRaftDetailLog) { @@ -1821,19 +1825,19 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE ASSERT(ret == 0); // update new config myIndex - bool hit = false; + bool IamInNew = false; for (int i = 0; i < newSyncCfg.replicaNum; ++i) { if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { newSyncCfg.myIndex = i; - hit = true; + IamInNew = true; break; } } bool isDrop; - if (hit) { // I am in newConfig + if (IamInNew || (!IamInNew && ths->state != TAOS_SYNC_STATE_LEADER)) { syncNodeUpdateConfig(ths, &newSyncCfg, pEntry->index, &isDrop); // change isStandBy to normal From fa54663871260650bab37a6d8338f82385d29136 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 13:52:17 +0800 Subject: [PATCH 07/16] refactor(sync): add last config index --- include/libs/sync/syncTools.h | 2 + source/libs/sync/inc/syncSnapshot.h | 1 + source/libs/sync/src/syncAppendEntries.c | 1 - source/libs/sync/src/syncAppendEntriesReply.c | 8 +-- source/libs/sync/src/syncMain.c | 4 +- source/libs/sync/src/syncMessage.c | 5 ++ source/libs/sync/src/syncSnapshot.c | 53 +++++++++++++++---- .../libs/sync/test/syncSnapshotSendTest.cpp | 12 +++++ 8 files changed, 69 insertions(+), 17 deletions(-) diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index a6802fc915..68a33d48cb 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -398,6 +398,8 @@ typedef struct SyncSnapshotSend { SyncTerm term; SyncIndex lastIndex; // lastIndex of snapshot SyncTerm lastTerm; // lastTerm of snapshot + SyncIndex lastConfigIndex; + SSyncCfg lastConfig; SyncTerm privateTerm; int32_t seq; uint32_t dataLen; diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 9fbcdf138b..a6170a92e3 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -43,6 +43,7 @@ typedef struct SSyncSnapshotSender { void * pCurrentBlock; int32_t blockLen; SSnapshot snapshot; + SSyncCfg lastConfig; int64_t sendingMS; SSyncNode *pSyncNode; int32_t replicaIndex; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 6b5a86ded9..b33f3481e7 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -326,7 +326,6 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { return ret; } - #if 0 int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { int32_t ret = 0; diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 7fc35afbb1..3d9565bdaf 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -190,15 +190,15 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries if (gRaftDetailLog) { char* s = snapshotSender2Str(pSender); sInfo( - "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " + "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld" "sender:%s", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, s); taosMemoryFree(s); } else { sInfo( "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " - "lastApplyTerm:%lu", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm); + "lastApplyTerm:%lu lastConfigIndex:%ld", + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 6722ed3703..ac96d933ed 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1201,13 +1201,13 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l *isDrop = true; if (IamInOld && !IamInNew) { - *isDrop = true; + *isDrop = true; } else { *isDrop = false; } if (IamInNew) { - pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal + pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal } raftCfgPersist(pSyncNode->pRaftCfg); diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 2f99a4c744..57d62d298e 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -16,6 +16,7 @@ #include "syncMessage.h" #include "syncUtil.h" #include "tcoding.h" +#include "syncRaftCfg.h" // --------------------------------------------- cJSON* syncRpcMsg2Json(SRpcMsg* pRpcMsg) { @@ -1846,6 +1847,10 @@ cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg) { snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastIndex); cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastConfigIndex); + cJSON_AddStringToObject(pRoot, "lastConfigIndex", u64buf); + cJSON_AddItemToObject(pRoot, "lastConfig", syncCfg2Json((SSyncCfg*)&(pMsg->lastConfig))); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->lastTerm); cJSON_AddStringToObject(pRoot, "lastTerm", u64buf); diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index ade4ed5d22..39f2a83c7c 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -19,6 +19,7 @@ #include "syncRaftStore.h" #include "syncUtil.h" #include "wal.h" +#include "syncRaftCfg.h" static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); @@ -83,6 +84,26 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { // get current snapshot info pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); + if (pSender->snapshot.lastConfigIndex != SYNC_INDEX_INVALID) { + SSyncRaftEntry *pEntry = NULL; + int32_t code = pSender->pSyncNode->pLogStore->syncLogGetEntry(pSender->pSyncNode->pLogStore, pSender->snapshot.lastConfigIndex, &pEntry); + ASSERT(code == 0); + ASSERT(pEntry == NULL); + + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + SSyncCfg lastConfig; + int32_t ret = syncCfgFromStr(rpcMsg.pCont, &lastConfig); + ASSERT(ret == 0); + pSender->lastConfig = lastConfig; + + rpcFreeCont(rpcMsg.pCont); + syncEntryDestory(pEntry); + + } else { + memset(&(pSender->lastConfig), 0, sizeof(SSyncCfg)); + } + pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; @@ -97,6 +118,8 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; + pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; // SYNC_SNAPSHOT_SEQ_BEGIN pMsg->privateTerm = pSender->privateTerm; @@ -112,15 +135,15 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sTrace( - "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, msgStr); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); taosMemoryFree(msgStr); } else { - sTrace("sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + sTrace("sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } syncSnapshotSendDestroy(pMsg); @@ -228,6 +251,8 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; + pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; pMsg->privateTerm = pSender->privateTerm; memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); @@ -245,20 +270,20 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sTrace( - "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, msgStr); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); taosMemoryFree(msgStr); } else { - sTrace("sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + sTrace("sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } } else { - sTrace("sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + sTrace("sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } syncSnapshotSendDestroy(pMsg); @@ -274,6 +299,8 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; + pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); @@ -540,6 +567,12 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, true); pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, pMsg->lastIndex + 1); + // maybe update lastconfig + if (pMsg->lastConfigIndex >= SYNC_INDEX_BEGIN) { + bool isDrop; + syncNodeUpdateConfig(pSyncNode, &(pMsg->lastConfig), pMsg->lastConfigIndex, &isDrop); + } + SSnapshot snapshot; pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); diff --git a/source/libs/sync/test/syncSnapshotSendTest.cpp b/source/libs/sync/test/syncSnapshotSendTest.cpp index 01d3264693..d4ae4af654 100644 --- a/source/libs/sync/test/syncSnapshotSendTest.cpp +++ b/source/libs/sync/test/syncSnapshotSendTest.cpp @@ -24,6 +24,15 @@ SyncSnapshotSend *createMsg() { pMsg->privateTerm = 99; pMsg->lastIndex = 22; pMsg->lastTerm = 33; + + pMsg->lastConfigIndex = 99; + pMsg->lastConfig.replicaNum = 3; + pMsg->lastConfig.myIndex = 1; + for (int i = 0; i < pMsg->lastConfig.replicaNum; ++i) { + ((pMsg->lastConfig.nodeInfo)[i]).nodePort = i * 100; + snprintf(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn, sizeof(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn), "100.200.300.%d", i); + } + pMsg->seq = 44; strcpy(pMsg->data, "hello world"); return pMsg; @@ -87,6 +96,9 @@ void test5() { } int main() { + + gRaftDetailLog = true; + tsAsyncLog = 0; sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; logTest(); From 33b5efc21ffc41332fb711e388f5410b948e3a10 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 11 Jun 2022 14:11:10 +0800 Subject: [PATCH 08/16] test: execute trans in follower --- source/dnode/mnode/impl/inc/mndTrans.h | 1 + source/dnode/mnode/impl/src/mndSync.c | 6 ++++++ source/dnode/mnode/impl/src/mndTrans.c | 23 ++++++++++++++++------- tests/script/jenkins/basic.txt | 4 ++-- tests/script/tsim/mnode/basic2.sim | 4 ++++ tests/script/tsim/mnode/basic3.sim | 2 +- 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndTrans.h b/source/dnode/mnode/impl/inc/mndTrans.h index 5ac9d2233f..0175e29a77 100644 --- a/source/dnode/mnode/impl/inc/mndTrans.h +++ b/source/dnode/mnode/impl/inc/mndTrans.h @@ -75,6 +75,7 @@ int32_t mndTransPrepare(SMnode *pMnode, STrans *pTrans); int32_t mndTransProcessRsp(SRpcMsg *pRsp); void mndTransPullup(SMnode *pMnode); int32_t mndKillTrans(SMnode *pMnode, STrans *pTrans); +void mndTransExecute(SMnode *pMnode, STrans *pTrans); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index adc86df829..e0b4cc6a57 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -61,6 +61,12 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM } tsem_post(&pMgmt->syncSem); } else { + STrans *pTrans = mndAcquireTrans(pMnode, transId); + if (pTrans != NULL) { + mndTransExecute(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } + if (cbMeta.index - sdbGetApplyIndex(pMnode->pSdb) > 100) { SSnapshotMeta sMeta = {0}; if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 310f2fffbc..033687db3e 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -52,8 +52,8 @@ static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans); static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans); static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans); static bool mndTransPerfromFinishedStage(SMnode *pMnode, STrans *pTrans); +static bool mndCantExecuteTransAction(SMnode *pMnode) { return !pMnode->deploy && !mndIsMaster(pMnode); } -static void mndTransExecute(SMnode *pMnode, STrans *pTrans); static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans); static int32_t mndProcessTransReq(SRpcMsg *pReq); static int32_t mndProcessKillTransReq(SRpcMsg *pReq); @@ -517,12 +517,12 @@ static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *pOld, STrans *pNew) { if (pOld->stage == TRN_STAGE_COMMIT) { pOld->stage = TRN_STAGE_COMMIT_ACTION; - mTrace("trans:%d, stage from commit to commitAction", pNew->id); + mTrace("trans:%d, stage from commit to commitAction since perform update action", pNew->id); } if (pOld->stage == TRN_STAGE_ROLLBACK) { pOld->stage = TRN_STAGE_FINISHED; - mTrace("trans:%d, stage from rollback to finished", pNew->id); + mTrace("trans:%d, stage from rollback to finished since perform update action", pNew->id); } return 0; } @@ -914,7 +914,7 @@ static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransActi static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction) { if (pAction->msgSent) return 0; - if (!pMnode->deploy && !mndIsMaster(pMnode)) return -1; + if (mndCantExecuteTransAction(pMnode)) return -1; int64_t signature = pTrans->id; signature = (signature << 32); @@ -1114,9 +1114,9 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) pTrans->lastEpset = pAction->epSet; } - if (code == 0) { - if (!pMnode->deploy && !mndIsMaster(pMnode)) break; + if (mndCantExecuteTransAction(pMnode)) break; + if (code == 0) { pTrans->code = 0; pTrans->redoActionPos++; mDebug("trans:%d, %s:%d is executed and need sync to other mnodes", pTrans->id, mndTransStr(pAction->stage), @@ -1160,6 +1160,8 @@ static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans) { code = mndTransExecuteRedoActions(pMnode, pTrans); } + if (mndCantExecuteTransAction(pMnode)) return false; + if (code == 0) { pTrans->code = 0; pTrans->stage = TRN_STAGE_COMMIT; @@ -1185,6 +1187,8 @@ static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans) { } static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans) { + if (mndCantExecuteTransAction(pMnode)) return false; + bool continueExec = true; int32_t code = mndTransCommit(pMnode, pTrans); @@ -1233,6 +1237,8 @@ static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans) { bool continueExec = true; int32_t code = mndTransExecuteUndoActions(pMnode, pTrans); + if (mndCantExecuteTransAction(pMnode)) return false; + if (code == 0) { pTrans->stage = TRN_STAGE_ROLLBACK; mDebug("trans:%d, stage from undoAction to rollback", pTrans->id); @@ -1250,6 +1256,8 @@ static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans) { } static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans) { + if (mndCantExecuteTransAction(pMnode)) return false; + bool continueExec = true; int32_t code = mndTransRollback(pMnode, pTrans); @@ -1284,10 +1292,11 @@ static bool mndTransPerfromFinishedStage(SMnode *pMnode, STrans *pTrans) { return continueExec; } -static void mndTransExecute(SMnode *pMnode, STrans *pTrans) { +void mndTransExecute(SMnode *pMnode, STrans *pTrans) { bool continueExec = true; while (continueExec) { + mDebug("trans:%d, continue to execute, stage:%s", pTrans->id, mndTransStr(pTrans->stage)); pTrans->lastExecTime = taosGetTimestampMs(); switch (pTrans->stage) { case TRN_STAGE_PREPARE: diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 5a8cf562a0..7c0ba65900 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -57,8 +57,8 @@ # ---- mnode ./test.sh -f tsim/mnode/basic1.sim -#./test.sh -f tsim/mnode/basic2.sim -./test.sh -f tsim/mnode/basic3.sim +./test.sh -f tsim/mnode/basic2.sim +#./test.sh -f tsim/mnode/basic3.sim ./test.sh -f tsim/mnode/basic4.sim # ---- show diff --git a/tests/script/tsim/mnode/basic2.sim b/tests/script/tsim/mnode/basic2.sim index 78558263d6..ff0101dd8e 100644 --- a/tests/script/tsim/mnode/basic2.sim +++ b/tests/script/tsim/mnode/basic2.sim @@ -92,6 +92,8 @@ sql show mnodes if $rows != 2 then return -1 endi +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 sql show users if $rows != 2 then @@ -111,6 +113,8 @@ step3: return -1 endi sql show dnodes -x step3 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 if $data(1)[4] != ready then goto step3 endi diff --git a/tests/script/tsim/mnode/basic3.sim b/tests/script/tsim/mnode/basic3.sim index dec036faaf..c876f4fd82 100644 --- a/tests/script/tsim/mnode/basic3.sim +++ b/tests/script/tsim/mnode/basic3.sim @@ -39,7 +39,7 @@ endi print =============== step2: create mnode 2 sql create mnode on dnode 2 sql create mnode on dnode 3 -return + system sh/exec.sh -n dnode1 -s stop -x SIGKILL sql_error create mnode on dnode 4 From 34d7d5fdbc3788e90f15d1dfd9b19d65aeea62fd Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 11 Jun 2022 14:22:18 +0800 Subject: [PATCH 09/16] test: reput mnode case --- tests/script/jenkins/basic.txt | 2 +- tests/script/tsim/mnode/basic3.sim | 82 +++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 7c0ba65900..3d398c8ae7 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -58,7 +58,7 @@ # ---- mnode ./test.sh -f tsim/mnode/basic1.sim ./test.sh -f tsim/mnode/basic2.sim -#./test.sh -f tsim/mnode/basic3.sim +./test.sh -f tsim/mnode/basic3.sim ./test.sh -f tsim/mnode/basic4.sim # ---- show diff --git a/tests/script/tsim/mnode/basic3.sim b/tests/script/tsim/mnode/basic3.sim index c876f4fd82..695e23f3ac 100644 --- a/tests/script/tsim/mnode/basic3.sim +++ b/tests/script/tsim/mnode/basic3.sim @@ -39,11 +39,9 @@ endi print =============== step2: create mnode 2 sql create mnode on dnode 2 sql create mnode on dnode 3 - -system sh/exec.sh -n dnode1 -s stop -x SIGKILL sql_error create mnode on dnode 4 - +$leaderExist = 0 $x = 0 step2: $x = $x + 1 @@ -52,13 +50,20 @@ step2: return -1 endi sql show mnodes -x step2 -if $data(1)[2] != leader then - goto step2 + +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +if $data(1)[2] == leader then + $leaderExist = 1 endi -if $data(2)[2] != follower then - goto step2 +if $data(2)[2] == leader then + $leaderExist = 1 endi -if $data(3)[2] != follower then +if $data(3)[2] == leader then + $leaderExist = 1 +endi +if $leaderExist != 1 then goto step2 endi @@ -70,10 +75,10 @@ if $rows != 2 then endi # wait mnode2 mnode3 recv data finish -sleep 10000 +sleep 1000 print =============== step4: stop dnode1 -system sh/exec.sh -n dnode1 -s stop +system sh/exec.sh -n dnode1 -s stop -x SIGKILL $x = 0 step4: @@ -92,13 +97,22 @@ if $rows != 2 then return -1 endi -sleep 1000 -sql show dnodes +$x = 0 +step41: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi +sql show dnodes -x step41 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 if $data(2)[4] != ready then - return -1 + goto step41 endi if $data(3)[4] != ready then - return -1 + goto step41 endi print =============== step5: stop dnode1 @@ -117,15 +131,29 @@ print $data(1)[0] $data(1)[1] $data(1)[2] print $data(2)[0] $data(2)[1] $data(2)[2] print $data(3)[0] $data(3)[1] $data(3)[2] -if $data(2)[2] != offline then - goto step5 -endi - sql show users if $rows != 2 then return -1 endi +$x = 0 +step51: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi +sql show dnodes -x step51 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +if $data(1)[4] != ready then + goto step51 +endi +if $data(3)[4] != ready then + goto step51 +endi + print =============== step6: stop dnode1 system sh/exec.sh -n dnode2 -s start system sh/exec.sh -n dnode3 -s stop @@ -147,6 +175,24 @@ if $rows != 2 then return -1 endi +$x = 0 +step61: + $x = $x + 1 + sleep 1000 + if $x == 10 then + return -1 + endi +sql show dnodes -x step61 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +if $data(1)[4] != ready then + goto step61 +endi +if $data(2)[4] != ready then + goto step61 +endi + system sh/exec.sh -n dnode1 -s stop system sh/exec.sh -n dnode2 -s stop system sh/exec.sh -n dnode3 -s stop From ed829455a9c8cbc889fe35ddbfff7ab8a0983c79 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 11 Jun 2022 15:15:55 +0800 Subject: [PATCH 10/16] feat: tsma refactor --- include/common/taosdef.h | 1 - include/common/tmsg.h | 45 - include/common/tmsgdef.h | 2 - source/common/src/tmsg.c | 139 +-- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 - source/dnode/mnode/impl/inc/mndDef.h | 2 - source/dnode/mnode/impl/src/mndSma.c | 105 +- source/dnode/vnode/CMakeLists.txt | 1 - source/dnode/vnode/src/inc/sma.h | 62 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 - source/dnode/vnode/src/sma/sma.c | 25 +- source/dnode/vnode/src/sma/smaEnv.c | 84 -- source/dnode/vnode/src/sma/smaTDBImpl.c | 130 --- source/dnode/vnode/src/sma/smaTimeRange.c | 1036 ------------------- source/dnode/vnode/src/sma/smaTimeRange2.c | 839 +-------------- source/dnode/vnode/src/tq/tqPush.c | 13 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 47 - 17 files changed, 18 insertions(+), 2516 deletions(-) delete mode 100644 source/dnode/vnode/src/sma/smaTDBImpl.c delete mode 100644 source/dnode/vnode/src/sma/smaTimeRange.c diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 60b1dc6c10..516df71b0b 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -98,7 +98,6 @@ extern char *qtypeStr[]; #undef TD_DEBUG_PRINT_ROW #undef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS #undef TD_DEBUG_PRINT_TAG -#define TD_DEBUG_SMA_ID 123456 #ifdef __cplusplus } diff --git a/include/common/tmsg.h b/include/common/tmsg.h index e7d9bc762a..e4e132a532 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2345,7 +2345,6 @@ typedef struct { char indexName[TSDB_INDEX_NAME_LEN]; int32_t exprLen; int32_t tagsFilterLen; - int32_t numOfVgroups; // for dstVgroup int64_t indexUid; tb_uid_t tableUid; // super/child/common table uid tb_uid_t dstTbUid; // for dstVgroup @@ -2355,7 +2354,6 @@ typedef struct { char* dstTbName; // for dstVgroup char* expr; // sma expression char* tagsFilter; - SVgEpSet* pVgEpSet; // for dstVgroup SSchemaWrapper schemaRow; // for dstVgroup SSchemaWrapper schemaTag; // for dstVgroup } STSma; // Time-range-wise SMA @@ -2442,49 +2440,6 @@ static int32_t tDecodeTSmaWrapper(SDecoder* pDecoder, STSmaWrapper* pReq) { return 0; } -typedef struct { - int64_t indexUid; - STimeWindow queryWindow; -} SVGetTsmaExpWndsReq; - -#define SMA_WNDS_EXPIRE_FLAG (0x1) -#define SMA_WNDS_IS_EXPIRE(flag) (((flag)&SMA_WNDS_EXPIRE_FLAG) != 0) -#define SMA_WNDS_SET_EXPIRE(flag) ((flag) |= SMA_WNDS_EXPIRE_FLAG) - -typedef struct { - int64_t indexUid; - int8_t flags; // 0x1 all window expired - int32_t numExpWnds; - TSKEY wndSKeys[]; -} SVGetTsmaExpWndsRsp; - -int32_t tEncodeSVGetTSmaExpWndsReq(SEncoder* pCoder, const SVGetTsmaExpWndsReq* pReq); -int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder* pCoder, SVGetTsmaExpWndsReq* pReq); -int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder* pCoder, const SVGetTsmaExpWndsRsp* pReq); -int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder* pCoder, SVGetTsmaExpWndsRsp* pReq); - -typedef struct { - int64_t nKeys; // n consecutive keys since skey - int64_t skey; -} SVTsmaExpWndItem; - -typedef struct { - int64_t indexUid; - int64_t version; // tsma result version - int64_t nItems; - SVTsmaExpWndItem items[]; -} SVClrTsmaExpWndsReq; - -typedef struct { - int64_t indexUid; - int32_t code; -} SVClrTsmaExpWndsRsp; - -int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder* pCoder, const SVClrTsmaExpWndsReq* pReq); -int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder* pCoder, SVClrTsmaExpWndsReq* pReq); -int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder* pCoder, const SVClrTsmaExpWndsRsp* pReq); -int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder* pCoder, SVClrTsmaExpWndsRsp* pReq); - typedef struct { int idx; } SMCreateFullTextReq; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 7e31076528..a9c816707e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -190,8 +190,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_SMA, "vnode-cancel-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) - TD_DEF_MSG_TYPE(TDMT_VND_GET_TSMA_EXP_WNDS, "vnode-get-tsma-expired-windows", SVGetTsmaExpWndsReq, SVGetTsmaExpWndsRsp) - TD_DEF_MSG_TYPE(TDMT_VND_CLR_TSMA_EXP_WNDS, "vnode-clr-tsma-expired-windows", SVClrTsmaExpWndsReq, SVClrTsmaExpWndsRsp) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 3fb60f3ff5..d16ab57ea9 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3877,7 +3877,6 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->indexName) < 0) return -1; if (tEncodeI32(pCoder, pSma->exprLen) < 0) return -1; if (tEncodeI32(pCoder, pSma->tagsFilterLen) < 0) return -1; - if (tEncodeI32(pCoder, pSma->numOfVgroups) < 0) return -1; if (tEncodeI64(pCoder, pSma->indexUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->tableUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->dstTbUid) < 0) return -1; @@ -3892,22 +3891,8 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - if (pSma->numOfVgroups) { // only needed in dstVgroup - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - if (tEncodeI8(pCoder, numOfEps) < 0) return -1; - for (int32_t n = 0; n < numOfEps; ++n) { - const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; - if (tEncodeU16(pCoder, pEp->port) < 0) return -1; - } - } - - tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); - tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); - } + tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); + tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); return 0; } @@ -3921,7 +3906,6 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeCStrTo(pCoder, pSma->indexName) < 0) return -1; if (tDecodeI32(pCoder, &pSma->exprLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->tagsFilterLen) < 0) return -1; - if (tDecodeI32(pCoder, &pSma->numOfVgroups) < 0) return -1; if (tDecodeI64(pCoder, &pSma->indexUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->tableUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->dstTbUid) < 0) return -1; @@ -3939,30 +3923,9 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { // only needed in dstVgroup - pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); - if (!pSma->pVgEpSet) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - memset(pSma->pVgEpSet, 0, pSma->numOfVgroups * sizeof(SVgEpSet)); - - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tDecodeI32(pCoder, &pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tDecodeI8(pCoder, &pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - if (tDecodeI8(pCoder, &pSma->pVgEpSet[v].epSet.numOfEps) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - for (int32_t n = 0; n < numOfEps; ++n) { - SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tDecodeCStrTo(pCoder, pEp->fqdn) < 0) return -1; - if (tDecodeU16(pCoder, &pEp->port) < 0) return -1; - } - } - - tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); - tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); - } + // only needed in dstVgroup + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); return 0; } @@ -4005,98 +3968,6 @@ int32_t tDecodeSVDropTSmaReq(SDecoder *pCoder, SVDropTSmaReq *pReq) { return 0; } -int32_t tEncodeSVGetTSmaExpWndsReq(SEncoder *pCoder, const SVGetTsmaExpWndsReq *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI64(pCoder, pReq->queryWindow.skey) < 0) return -1; - if (tEncodeI64(pCoder, pReq->queryWindow.ekey) < 0) return -1; - - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder *pCoder, SVGetTsmaExpWndsReq *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->queryWindow.skey) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->queryWindow.ekey) < 0) return -1; - - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder *pCoder, const SVGetTsmaExpWndsRsp *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI8(pCoder, pReq->flags) < 0) return -1; - if (tEncodeI32(pCoder, pReq->numExpWnds) < 0) return -1; - for (int32_t i = 0; i < pReq->numExpWnds; ++i) { - if (tEncodeI64(pCoder, pReq->wndSKeys[i]) < 0) return -1; - } - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder *pCoder, SVGetTsmaExpWndsRsp *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI8(pCoder, &pReq->flags) < 0) return -1; - if (tDecodeI32(pCoder, &pReq->numExpWnds) < 0) return -1; - for (int32_t i = 0; i < pReq->numExpWnds; ++i) { - if (tDecodeI64(pCoder, &pReq->wndSKeys[i]) < 0) return -1; - } - - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder *pCoder, const SVClrTsmaExpWndsReq *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI64(pCoder, pReq->version) < 0) return -1; - if (tEncodeI64v(pCoder, pReq->nItems) < 0) return -1; - for (int64_t n = 0; pReq->nItems; ++n) { - if (tEncodeI64v(pCoder, pReq->items[n].nKeys) < 0) return -1; - if (tEncodeI64(pCoder, pReq->items[n].skey) < 0) return -1; - } - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->version) < 0) return -1; - if (tDecodeI64v(pCoder, &pReq->nItems) < 0) return -1; - - for (int64_t i = 0; i < pReq->nItems; ++i) { - if (tDecodeI64v(pCoder, &pReq->items[i].nKeys) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->items[i].skey) < 0) return -1; - } - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI32v(pCoder, pReq->code) < 0) return -1; - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI32v(pCoder, &pReq->code) < 0) return -1; - tEndDecode(pCoder); - return 0; -} - int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 1540f10ba4..ee120576c3 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -347,7 +347,6 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_CLR_TSMA_EXP_WNDS, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index d21af87067..ae92497b8a 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -318,12 +318,10 @@ typedef struct { int32_t tagsFilterLen; int32_t sqlLen; int32_t astLen; - int32_t numOfVgroups; // for dstVgroup char* expr; char* tagsFilter; char* sql; char* ast; - SVgEpSet* pVgEpSet; // for dstVgroup SSchemaWrapper schemaRow; // for dstVgroup SSchemaWrapper schemaTag; // for dstVgroup } SSmaObj; diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index a643402739..c19b558f19 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -272,8 +272,6 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.sliding = pSma->sliding; req.expr = pSma->expr; req.tagsFilter = pSma->tagsFilter; - req.numOfVgroups = pSma->numOfVgroups; - req.pVgEpSet = pSma->pVgEpSet; req.schemaRow = pSma->schemaRow; req.schemaTag = pSma->schemaTag; req.dstTbName = pSma->dstTbName; @@ -435,7 +433,6 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, mndReleaseDnode(pMnode, pDnode); // todo add sma info here -#if 1 SNode *pAst = NULL; if (nodesStringToNode(pSma->ast, &pAst) < 0) { return -1; @@ -452,7 +449,6 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, pSma->schemaTag.version = 1; pSma->schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); if (!pSma->schemaTag.pSchema) { - nodesDestroyNode(pAst); return -1; } pSma->schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; @@ -461,17 +457,7 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, pSma->schemaTag.pSchema[0].flags = 0; snprintf(pSma->schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); - SVgEpSet *pVgEpSet = NULL; - int32_t numOfVgroups = 0; - if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { - nodesDestroyNode(pAst); - return -1; - } - nodesDestroyNode(pAst); - pSma->pVgEpSet = pVgEpSet; - pSma->numOfVgroups = numOfVgroups; -#endif int32_t smaContLen = 0; void *pSmaReq = mndBuildVCreateSmaReq(pMnode, pVgroup, pSma, &smaContLen); if (pSmaReq == NULL) return -1; @@ -501,10 +487,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.stb, pStb->name, TSDB_TABLE_FNAME_LEN); memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); -#if 0 smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); -#endif - smaObj.uid = TD_DEBUG_SMA_ID; char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "td.tsma.rst.tb.%s", pCreate->name); memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); @@ -514,7 +497,6 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea smaObj.intervalUnit = pCreate->intervalUnit; smaObj.slidingUnit = pCreate->slidingUnit; smaObj.timezone = pCreate->timezone; - // smaObj.dstVgId = pCreate->dstVgId; smaObj.interval = pCreate->interval; smaObj.offset = pCreate->offset; smaObj.sliding = pCreate->sliding; @@ -547,42 +529,6 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.ast, pCreate->ast, smaObj.astLen); } -#if 1 // only for debugging, not needed in common vgroups, only needed in dstVgroup. - SNode *pAst = NULL; - if (nodesStringToNode(smaObj.ast, &pAst) < 0) { - return -1; - } - if (qExtractResultSchema(pAst, &smaObj.schemaRow.nCols, &smaObj.schemaRow.pSchema) != 0) { - nodesDestroyNode(pAst); - return -1; - } - smaObj.schemaRow.version = 1; - - smaObj.schemaTag.nCols = 1; - smaObj.schemaTag.version = 1; - smaObj.schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); - if (!smaObj.schemaTag.pSchema) { - nodesDestroyNode(pAst); - return -1; - } - smaObj.schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; - smaObj.schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; - smaObj.schemaTag.pSchema[0].colId = smaObj.schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; - smaObj.schemaTag.pSchema[0].flags = 0; - snprintf(smaObj.schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); - - nodesDestroyNode(pAst); - - SVgEpSet *pVgEpSet = NULL; - int32_t numOfVgroups = 0; - if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { - return -1; - } - - smaObj.pVgEpSet = pVgEpSet; - smaObj.numOfVgroups = numOfVgroups; -#endif - SStreamObj streamObj = {0}; tstrncpy(streamObj.name, pCreate->name, TSDB_STREAM_FNAME_LEN); tstrncpy(streamObj.sourceDb, pDb->name, TSDB_DB_FNAME_LEN); @@ -1168,53 +1114,4 @@ static int32_t mndRetrieveSma(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc static void mndCancelGetNextSma(SMnode *pMnode, void *pIter) { SSdb *pSdb = pMnode->pSdb; sdbCancelFetch(pSdb, pIter); -} - -static int32_t mndSmaGetVgEpSet(SMnode *pMnode, SDbObj *pDb, SVgEpSet **ppVgEpSet, int32_t *numOfVgroups) { - SSdb *pSdb = pMnode->pSdb; - SVgObj *pVgroup = NULL; - void *pIter = NULL; - SVgEpSet *pVgEpSet = NULL; - int32_t nAllocVgs = 16; - int32_t nVgs = 0; - - pVgEpSet = taosMemoryCalloc(nAllocVgs, sizeof(SVgEpSet)); - if (!pVgEpSet) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - while (1) { - pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); - if (pIter == NULL) break; - if (pVgroup->dbUid != pDb->uid) { - sdbRelease(pSdb, pVgroup); - continue; - } - - if (nVgs >= nAllocVgs) { - void *p = taosMemoryRealloc(pVgEpSet, nAllocVgs * 2 * sizeof(SVgEpSet)); - if (!p) { - taosMemoryFree(pVgEpSet); - sdbCancelFetch(pSdb, pIter); - sdbRelease(pSdb, pVgroup); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - pVgEpSet = (SVgEpSet *)p; - nAllocVgs *= 2; - } - - (pVgEpSet + nVgs)->vgId = pVgroup->vgId; - (pVgEpSet + nVgs)->epSet = mndGetVgroupEpset(pMnode, pVgroup); - - ++nVgs; - - sdbRelease(pSdb, pVgroup); - } - - *ppVgEpSet = pVgEpSet; - *numOfVgroups = nVgs; - - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 978fd9013a..8dca589320 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -28,7 +28,6 @@ target_sources( # sma "src/sma/sma.c" - "src/sma/smaTDBImpl.c" "src/sma/smaEnv.c" "src/sma/smaOpen.c" "src/sma/smaRollup.c" diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 7eb5c34e5d..e9da125841 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -43,35 +43,17 @@ typedef struct SRSmaInfo SRSmaInfo; struct SSmaEnv { TdThreadRwlock lock; int8_t type; - TXN txn; - void *pPool; // SPoolMem - SDiskID did; - TDB *dbEnv; // TODO: If it's better to put it in smaIndex level? - char *path; // relative path SSmaStat *pStat; }; #define SMA_ENV_LOCK(env) ((env)->lock) #define SMA_ENV_TYPE(env) ((env)->type) -#define SMA_ENV_DID(env) ((env)->did) -#define SMA_ENV_ENV(env) ((env)->dbEnv) -#define SMA_ENV_PATH(env) ((env)->path) #define SMA_ENV_STAT(env) ((env)->pStat) #define SMA_ENV_STAT_ITEMS(env) ((env)->pStat->smaStatItems) struct SSmaStatItem { - /** - * @brief The field 'state' is here to demonstrate if one smaIndex is ready to provide service. - * - TSDB_SMA_STAT_OK: 1) The sma calculation of history data is finished; 2) Or recevied information from - * Streaming Module or TSDB local persistence. - * - TSDB_SMA_STAT_EXPIRED: 1) If sma calculation of history TS data is not finished; 2) Or if the TSDB is open, - * without information about its previous state. - * - TSDB_SMA_STAT_DROPPED: 1)sma dropped - * N.B. only applicable to tsma - */ - int8_t state; // ETsdbSmaStat - SHashObj *expireWindows; // key: skey of time window, value: version - STSma *pTSma; // cache schema + int8_t state; // ETsdbSmaStat + STSma *pTSma; // cache schema }; struct SSmaStat { @@ -84,29 +66,6 @@ struct SSmaStat { #define SMA_STAT_ITEMS(s) ((s)->smaStatItems) #define SMA_STAT_INFO_HASH(s) ((s)->rsmaInfoHash) -struct SSmaKey { - TSKEY skey; - int64_t groupId; -}; - -typedef struct SDBFile SDBFile; - -struct SDBFile { - int32_t fid; - TTB *pDB; - char *path; -}; - -int32_t tdSmaBeginCommit(SSmaEnv *pEnv); -int32_t tdSmaEndCommit(SSmaEnv *pEnv); - -int32_t smaOpenDBEnv(TDB **ppEnv, const char *path); -int32_t smaCloseDBEnv(TDB *pEnv); -int32_t smaOpenDBF(TDB *pEnv, SDBFile *pDBF); -int32_t smaCloseDBF(SDBFile *pDBF); -int32_t smaSaveSmaToDB(SDBFile *pDBF, void *pKey, int32_t keyLen, void *pVal, int32_t valLen, TXN *txn); -void *smaGetSmaDataByKey(SDBFile *pDBF, const void *pKey, int32_t keyLen, int32_t *valLen); - void tdDestroySmaEnv(SSmaEnv *pSmaEnv); void *tdFreeSmaEnv(SSmaEnv *pSmaEnv); #if 0 @@ -114,13 +73,6 @@ int32_t tbGetTSmaStatus(SSma *pSma, STSma *param, void *result); int32_t tbRemoveTSmaData(SSma *pSma, STSma *param, STimeWindow *pWin); #endif -static FORCE_INLINE int32_t tdEncodeTSmaKey(int64_t groupId, TSKEY tsKey, void **pData) { - int32_t len = 0; - len += taosEncodeFixedI64(pData, tsKey); - len += taosEncodeFixedI64(pData, groupId); - return len; -} - int32_t tdInitSma(SSma *pSma); int32_t tdDropTSma(SSma *pSma, char *pMsg); int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid); @@ -133,8 +85,6 @@ int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck); int32_t tdLockSma(SSma *pSma); int32_t tdUnLockSma(SSma *pSma); -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); - static FORCE_INLINE int16_t tdTSmaAdd(SSma *pSma, int16_t n) { return atomic_add_fetch_16(&SMA_TSMA_NUM(pSma), n); } static FORCE_INLINE int16_t tdTSmaSub(SSma *pSma, int16_t n) { return atomic_sub_fetch_16(&SMA_TSMA_NUM(pSma), n); } @@ -219,12 +169,8 @@ static int32_t tdInitSmaEnv(SSma *pSma, int8_t smaType, const char *path, SDisk void *tdFreeRSmaInfo(SRSmaInfo *pInfo); int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); -int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg); -// TODO: This is the basic params, and should wrap the params to a queryHandle. -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult); - -int32_t tdGetTSmaDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); +int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); +int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 944a03f70a..e374a64bf3 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -150,8 +150,6 @@ int32_t tqProcessTaskRecoverRsp(STQ* pTq, SRpcMsg* pMsg); int32_t smaOpen(SVnode* pVnode); int32_t smaClose(SSma* pSma); -int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version); -int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/sma.c b/source/dnode/vnode/src/sma/sma.c index 5782318006..98e5d7c66d 100644 --- a/source/dnode/vnode/src/sma/sma.c +++ b/source/dnode/vnode/src/sma/sma.c @@ -36,32 +36,9 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg) { return code; } -int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdUpdateExpireWindowImpl(pSma, pMsg, version)) < 0) { - smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} -int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdClearExpireWindowImpl(pSma, pMsg)) < 0) { - smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -int32_t tdGetTSmaData(SSma* pSma, char* pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdGetTSmaDataImpl(pSma, pData, indexUid, querySKey, nMaxResult)) < 0) { - smaWarn("vgId:%d, get tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - int32_t smaGetTSmaDays(SVnodeCfg* pCfg, void* pCont, uint32_t contLen, int32_t* days) { int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdGetTSmaDaysImpl(pCfg, pCont, contLen, days)) < 0) { + if ((code = tdProcessTSmaGetDaysImpl(pCfg, pCont, contLen, days)) < 0) { smaWarn("vgId:%d, get tsma days failed since %s", pCfg->vgId, tstrerror(terrno)); } smaDebug("vgId:%d, get tsma days %d", pCfg->vgId, *days); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index d15769e28e..5eec5076e8 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -151,31 +151,11 @@ static SSmaEnv *tdNewSmaEnv(const SSma *pSma, int8_t smaType, const char *path, return NULL; } - ASSERT(path && (strlen(path) > 0)); - SMA_ENV_PATH(pEnv) = strdup(path); - if (!SMA_ENV_PATH(pEnv)) { - tdFreeSmaEnv(pEnv); - return NULL; - } - - SMA_ENV_DID(pEnv) = did; - if (tdInitSmaStat(&SMA_ENV_STAT(pEnv), smaType) != TSDB_CODE_SUCCESS) { tdFreeSmaEnv(pEnv); return NULL; } - char aname[TSDB_FILENAME_LEN] = {0}; - tfsAbsoluteName(SMA_TFS(pSma), did, path, aname); - if (smaOpenDBEnv(&pEnv->dbEnv, aname) != TSDB_CODE_SUCCESS) { - tdFreeSmaEnv(pEnv); - return NULL; - } - - if (!(pEnv->pPool = openPool())) { - tdFreeSmaEnv(pEnv); - return NULL; - } return pEnv; } @@ -205,10 +185,7 @@ void tdDestroySmaEnv(SSmaEnv *pSmaEnv) { if (pSmaEnv) { tdDestroySmaState(pSmaEnv->pStat, SMA_ENV_TYPE(pSmaEnv)); taosMemoryFreeClear(pSmaEnv->pStat); - taosMemoryFreeClear(pSmaEnv->path); taosThreadRwlockDestroy(&(pSmaEnv->lock)); - smaCloseDBEnv(pSmaEnv->dbEnv); - closePool(pSmaEnv->pPool); } } @@ -280,7 +257,6 @@ void *tdFreeSmaStatItem(SSmaStatItem *pSmaStatItem) { if (pSmaStatItem) { tDestroyTSma(pSmaStatItem->pTSma); taosMemoryFreeClear(pSmaStatItem->pTSma); - taosHashCleanup(pSmaStatItem->expireWindows); taosMemoryFreeClear(pSmaStatItem); } return NULL; @@ -399,63 +375,3 @@ int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck) { return TSDB_CODE_SUCCESS; }; - -int32_t tdSmaBeginCommit(SSmaEnv *pEnv) { - TXN *pTxn = &pEnv->txn; - // start a new txn - tdbTxnOpen(pTxn, 0, poolMalloc, poolFree, pEnv->pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - if (tdbBegin(pEnv->dbEnv, pTxn) != 0) { - smaWarn("tdSma tdb begin commit fail"); - return -1; - } - return 0; -} - -int32_t tdSmaEndCommit(SSmaEnv *pEnv) { - TXN *pTxn = &pEnv->txn; - - // Commit current txn - if (tdbCommit(pEnv->dbEnv, pTxn) != 0) { - smaWarn("tdSma tdb end commit fail"); - return -1; - } - tdbTxnClose(pTxn); - clearPool(pEnv->pPool); - return 0; -} - -#if 0 -/** - * @brief Get the start TS key of the last data block of one interval/sliding. - * - * @param pSma - * @param param - * @param result - * @return int32_t - * 1) Return 0 and fill the result if the check procedure is normal; - * 2) Return -1 if error occurs during the check procedure. - */ -int32_t tdGetTSmaStatus(SSma *pSma, void *smaIndex, void *result) { - const char *procedure = ""; - if (strncmp(procedure, "get the start TS key of the last data block", 100) != 0) { - return -1; - } - // fill the result - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Remove the tSma data files related to param between pWin. - * - * @param pSma - * @param param - * @param pWin - * @return int32_t - */ -int32_t tdRemoveTSmaData(SSma *pSma, void *smaIndex, STimeWindow *pWin) { - // for ("tSmaFiles of param-interval-sliding between pWin") { - // // remove the tSmaFile - // } - return TSDB_CODE_SUCCESS; -} -#endif diff --git a/source/dnode/vnode/src/sma/smaTDBImpl.c b/source/dnode/vnode/src/sma/smaTDBImpl.c deleted file mode 100644 index cac986d053..0000000000 --- a/source/dnode/vnode/src/sma/smaTDBImpl.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define ALLOW_FORBID_FUNC - -#include "sma.h" - -int32_t smaOpenDBEnv(TDB **ppEnv, const char *path) { - int ret = 0; - - if (path == NULL) return -1; - - ret = tdbOpen(path, 4096, 256, ppEnv); // use as param - - if (ret != 0) { - smaError("failed to create tsdb db env, ret = %d", ret); - return -1; - } - - return 0; -} - -int32_t smaCloseDBEnv(TDB *pEnv) { return tdbClose(pEnv); } - -static inline int tdSmaKeyCmpr(const void *arg1, int len1, const void *arg2, int len2) { - const SSmaKey *pKey1 = (const SSmaKey *)arg1; - const SSmaKey *pKey2 = (const SSmaKey *)arg2; - - ASSERT(len1 == len2 && len1 == sizeof(SSmaKey)); - - if (pKey1->skey < pKey2->skey) { - return -1; - } else if (pKey1->skey > pKey2->skey) { - return 1; - } - if (pKey1->groupId < pKey2->groupId) { - return -1; - } else if (pKey1->groupId > pKey2->groupId) { - return 1; - } - - return 0; -} - -static int32_t smaOpenDBDb(TTB **ppDB, TDB *pEnv, const char *pFName) { - tdb_cmpr_fn_t compFunc; - - // Create a database - compFunc = tdSmaKeyCmpr; - if (tdbTbOpen(pFName, -1, -1, compFunc, pEnv, ppDB) < 0) { - return -1; - } - - return 0; -} - -static int32_t smaCloseDBDb(TTB *pDB) { return tdbTbClose(pDB); } - -int32_t smaOpenDBF(TDB *pEnv, SDBFile *pDBF) { - // TEnv is shared by a group of SDBFile - if (!pEnv || !pDBF) { - terrno = TSDB_CODE_INVALID_PTR; - return -1; - } - - // Open DBF - if (smaOpenDBDb(&(pDBF->pDB), pEnv, pDBF->path) < 0) { - smaError("failed to open DBF: %s", pDBF->path); - smaCloseDBDb(pDBF->pDB); - return -1; - } - - return 0; -} - -int32_t smaCloseDBF(SDBFile *pDBF) { - int32_t ret = 0; - if (pDBF->pDB) { - ret = smaCloseDBDb(pDBF->pDB); - pDBF->pDB = NULL; - } - taosMemoryFreeClear(pDBF->path); - return ret; -} - -int32_t smaSaveSmaToDB(SDBFile *pDBF, void *pKey, int32_t keyLen, void *pVal, int32_t valLen, TXN *txn) { - int32_t ret; - - printf("save tsma data into %s, keyLen:%d valLen:%d txn:%p\n", pDBF->path, keyLen, valLen, txn); - ret = tdbTbUpsert(pDBF->pDB, pKey, keyLen, pVal, valLen, txn); - if (ret < 0) { - smaError("failed to upsert tsma data into db, ret = %d", ret); - return -1; - } - - return 0; -} - -void *smaGetSmaDataByKey(SDBFile *pDBF, const void *pKey, int32_t keyLen, int32_t *valLen) { - void *pVal = NULL; - int ret; - - ret = tdbTbGet(pDBF->pDB, pKey, keyLen, &pVal, valLen); - - if (ret < 0) { - smaError("failed to get tsma data from db, ret = %d", ret); - return NULL; - } - - ASSERT(*valLen >= 0); - - // TODO: lock? - // TODO: Would the key/value be destoryed during return the data? - // TODO: How about the key is updated while value length is changed? The original value buffer would be freed - // automatically? - - return pVal; -} \ No newline at end of file diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c deleted file mode 100644 index 4cc9703531..0000000000 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ /dev/null @@ -1,1036 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sma.h" -#include "tsdb.h" - -typedef STsdbCfg STSmaKeepCfg; - -#undef _TEST_SMA_PRINT_DEBUG_LOG_ -#define SMA_STORAGE_TSDB_MINUTES 86400 -#define SMA_STORAGE_TSDB_TIMES 10 -#define SMA_STORAGE_SPLIT_FACTOR 144 // least records in tsma file -#define SMA_KEY_LEN 16 // TSKEY+groupId 8+8 -#define SMA_DROP_EXPIRED_TIME 10 // default is 10 seconds - -#define SMA_STATE_ITEM_HASH_SLOT 32 - -typedef struct { - SSma *pSma; - SDBFile dFile; - const SArray *pDataBlocks; // sma data - int64_t interval; // interval with the precision of DB -} STSmaWriteH; - -typedef struct { - int32_t iter; - int32_t fid; -} SmaFsIter; - -typedef struct { - STsdb *pTsdb; - SSma *pSma; - SDBFile dFile; - int64_t interval; // interval with the precision of DB - int32_t blockSize; // size of SMA block item - int32_t days; - int8_t storageLevel; - SmaFsIter smaFsIter; -} STSmaReadH; - -typedef enum { - SMA_STORAGE_LEVEL_TSDB = 0, // use days of self-defined e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f200.tsma - SMA_STORAGE_LEVEL_DFILESET = 1 // use days of TS data e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f1906.tsma -} ESmaStorageLevel; - -// static func - -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted); -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval); -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit); -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit); -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH); -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel); -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid); -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn); -// expire window - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); - -// read data - -// implementation - -/** - * @brief - * - * @param pSmaH - * @param pSma - * @param interval - * @param intervalUnit - * @return int32_t - */ -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit) { - STSmaKeepCfg *pCfg = SMA_TSDB_CFG(pSma); - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->storageLevel = tdGetSmaStorageLevel(pCfg, interval); - pSmaH->days = tdGetTSmaDays(pSma, pSmaH->interval, pSmaH->storageLevel); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Init of tSma FS - * - * @param pReadH - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { - SSma *pSma = pSmaH->pSma; - - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, pSmaH->days, SMA_TSDB_CFG(pSma)->precision)); - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - pSmaH->smaFsIter.iter = 0; - pSmaH->smaFsIter.fid = fid; - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Set and open tSma file if it has key locates in queryWin. - * - * @param pReadH - * @param param - * @param queryWin - * @return true - * @return false - */ -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey) { - // SArray *smaFs = pReadH->pTsdb->fs->cstatus->sf; - // int32_t nSmaFs = taosArrayGetSize(smaFs); - - smaCloseDBF(&pReadH->dFile); - -#if 0 - while (pReadH->smaFsIter.iter < nSmaFs) { - void *pSmaFile = taosArrayGet(smaFs, pReadH->smaFsIter.iter); - if (pSmaFile) { // match(indexName, queryWindow) - // TODO: select the file by index_name ... - pReadH->dFile = pSmaFile; - ++pReadH->smaFsIter.iter; - break; - } - ++pReadH->smaFsIter.iter; - } - - if (pReadH->pDFile) { - tdDebug("vg%d: smaFile %s matched", REPO_ID(pReadH->pTsdb), "[pSmaFile dir]"); - return true; - } -#endif - - return false; -} - -/** - * @brief Approximate value for week/month/year. - * - * @param interval - * @param intervalUnit - * @param precision - * @param adjusted Interval already adjusted according to DB precision - * @return int64_t - */ -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted) { - if (adjusted) { - return interval; - } - - switch (intervalUnit) { - case TIME_UNIT_YEAR: // approximate value - interval *= 365 * 86400 * 1e3; - break; - case TIME_UNIT_MONTH: // approximate value - interval *= 30 * 86400 * 1e3; - break; - case TIME_UNIT_WEEK: // approximate value - interval *= 7 * 86400 * 1e3; - break; - case TIME_UNIT_DAY: // the interval for tSma calculation must <= day - interval *= 86400 * 1e3; - break; - case TIME_UNIT_HOUR: - interval *= 3600 * 1e3; - break; - case TIME_UNIT_MINUTE: - interval *= 60 * 1e3; - break; - case TIME_UNIT_SECOND: - interval *= 1e3; - break; - default: - break; - } - - switch (precision) { - case TSDB_TIME_PRECISION_MILLI: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // nano second - return interval / 1e6; - } else { // ms - return interval; - } - break; - case TSDB_TIME_PRECISION_MICRO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e3; - } else { // ms - return interval * 1e3; - } - break; - case TSDB_TIME_PRECISION_NANO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval * 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval; - } else { // ms - return interval * 1e6; - } - break; - default: // ms - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e6; - } else { // ms - return interval; - } - break; - } - return interval; -} - -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit) { - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->pDataBlocks = pDataBlocks; - pSmaH->dFile.fid = SMA_IVLD_FID; - return TSDB_CODE_SUCCESS; -} - -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH) { - if (pSmaH) { - smaCloseDBF(&pSmaH->dFile); - } -} - -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid) { - SSma *pSma = pSmaH->pSma; - ASSERT(!pSmaH->dFile.path && !pSmaH->dFile.pDB); - - pSmaH->dFile.fid = fid; - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma - * @param interval Interval calculated by DB's precision - * @param storageLevel - * @return int32_t - */ -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - int32_t daysPerFile = pCfg->days; // unit is minute - - if (storageLevel == SMA_STORAGE_LEVEL_TSDB) { - int32_t minutes = SMA_STORAGE_TSDB_TIMES * (interval / tsTickPerMin[pCfg->precision]); - if (minutes > SMA_STORAGE_TSDB_MINUTES) { - daysPerFile = SMA_STORAGE_TSDB_MINUTES; - } - } - - return daysPerFile; -} - -/** - * @brief Judge the tSma storage level - * - * @param pCfg - * @param interval - * @return int32_t - */ -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { - int64_t mInterval = convertTimeFromPrecisionToUnit(interval, pCfg->precision, TIME_UNIT_MINUTE); - if (pCfg->days / mInterval >= SMA_STORAGE_SPLIT_FACTOR) { - return SMA_STORAGE_LEVEL_DFILESET; - } - return SMA_STORAGE_LEVEL_TSDB; -} - -/** - * @brief Insert/Update Time-range-wise SMA data. - * - If interval < SMA_STORAGE_SPLIT_HOURS(e.g. 24), save the SMA data as a part of DFileSet to e.g. - * v3f1900.tsma.${sma_index_name}. The days is the same with that for TS data files. - * - If interval >= SMA_STORAGE_SPLIT_HOURS, save the SMA data to e.g. vnode3/tsma/v3f632.tsma.${sma_index_name}. The - * days is 30 times of the interval, and the minimum days is SMA_STORAGE_TSDB_DAYS(30d). - * - The destination file of one data block for some interval is determined by its start TS key. - * - * @param pSma - * @param msg - * @return int32_t - */ -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - const SArray *pDataBlocks = (const SArray *)msg; - int64_t testSkey = TSKEY_INITIAL_VAL; - - // TODO: destroy SSDataBlocks(msg) - - // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus - // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - if (!pDataBlocks) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, insert tSma data failed since pDataBlocks is NULL", SMA_VID(pSma)); - return terrno; - } - - if (taosArrayGetSize(pDataBlocks) <= 0) { - terrno = TSDB_CODE_INVALID_PARA; - smaWarn("vgId:%d, insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - - if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TSMA_INVALID_STAT; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - STSma *pTSma = pItem->pTSma; - STSmaWriteH tSmaH = {0}; - - if (tdInitTSmaWriteH(&tSmaH, pSma, pDataBlocks, pTSma->interval, pTSma->intervalUnit) != 0) { - return TSDB_CODE_FAILED; - } - - char rPath[TSDB_FILENAME_LEN] = {0}; - char aPath[TSDB_FILENAME_LEN] = {0}; - snprintf(rPath, TSDB_FILENAME_LEN, "%s%s%" PRIi64, SMA_ENV_PATH(pEnv), TD_DIRSEP, indexUid); - tfsAbsoluteName(SMA_TFS(pSma), SMA_ENV_DID(pEnv), rPath, aPath); - if (!taosCheckExistFile(aPath)) { - if (tfsMkdirRecurAt(SMA_TFS(pSma), rPath, SMA_ENV_DID(pEnv)) != TSDB_CODE_SUCCESS) { - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } - - // Step 1: Judge the storage level and days - int32_t storageLevel = tdGetSmaStorageLevel(pCfg, tSmaH.interval); - int32_t minutePerFile = tdGetTSmaDays(pSma, tSmaH.interval, storageLevel); - - char smaKey[SMA_KEY_LEN] = {0}; // key: skey + groupId - char dataBuf[512] = {0}; // val: aggr data // TODO: handle 512 buffer? - void *pDataBuf = NULL; - int32_t sz = taosArrayGetSize(pDataBlocks); - for (int32_t i = 0; i < sz; ++i) { - SSDataBlock *pDataBlock = taosArrayGet(pDataBlocks, i); - int32_t colNum = pDataBlock->info.numOfCols; - int32_t rows = pDataBlock->info.rows; - int32_t rowSize = pDataBlock->info.rowSize; - int64_t groupId = pDataBlock->info.groupId; - for (int32_t j = 0; j < rows; ++j) { - printf("|"); - TSKEY skey = TSKEY_INITIAL_VAL; // the start key of TS window by interval - void *pSmaKey = &smaKey; - bool isStartKey = false; - - int32_t tlen = 0; // reset the len - pDataBuf = &dataBuf; // reset the buf - for (int32_t k = 0; k < colNum; ++k) { - SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - void *var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - if (!isStartKey) { - isStartKey = true; - skey = *(TSKEY *)var; - testSkey = skey; - printf("= skey %" PRIi64 " groupId = %" PRIi64 "|", skey, groupId); - tdEncodeTSmaKey(groupId, skey, &pSmaKey); - } else { - printf(" %" PRIi64 " |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - } - break; - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_UTINYINT: - printf(" %15d |", *(uint8_t *)var); - tlen += taosEncodeFixedU8(&pDataBuf, *(uint8_t *)var); - break; - case TSDB_DATA_TYPE_TINYINT: - printf(" %15d |", *(int8_t *)var); - tlen += taosEncodeFixedI8(&pDataBuf, *(int8_t *)var); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf(" %15d |", *(int16_t *)var); - tlen += taosEncodeFixedI16(&pDataBuf, *(int16_t *)var); - break; - case TSDB_DATA_TYPE_USMALLINT: - printf(" %15d |", *(uint16_t *)var); - tlen += taosEncodeFixedU16(&pDataBuf, *(uint16_t *)var); - break; - case TSDB_DATA_TYPE_INT: - printf(" %15d |", *(int32_t *)var); - tlen += taosEncodeFixedI32(&pDataBuf, *(int32_t *)var); - break; - case TSDB_DATA_TYPE_FLOAT: - printf(" %15f |", *(float *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(float)); - break; - case TSDB_DATA_TYPE_UINT: - printf(" %15u |", *(uint32_t *)var); - tlen += taosEncodeFixedU32(&pDataBuf, *(uint32_t *)var); - break; - case TSDB_DATA_TYPE_BIGINT: - printf(" %15ld |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf(" %15lf |", *(double *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(double)); - case TSDB_DATA_TYPE_UBIGINT: - printf(" %15lu |", *(uint64_t *)var); - tlen += taosEncodeFixedU64(&pDataBuf, *(uint64_t *)var); - break; - case TSDB_DATA_TYPE_NCHAR: { - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARBINARY: - // TODO: add binary/varbinary - TASSERT(0); - default: - printf("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); - TASSERT(0); - break; - } - } - printf("\n"); - // if ((tlen > 0) && (skey != TSKEY_INITIAL_VAL)) { - if (tlen > 0) { - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, minutePerFile, pCfg->precision)); - - // Step 2: Set the DFile for storage of SMA index, and iterate/split the TSma data and store to B+Tree index - // file - // - Set and open the DFile or the B+Tree file - // TODO: tsdbStartTSmaCommit(); - if (fid != tSmaH.dFile.fid) { - if (tSmaH.dFile.fid != SMA_IVLD_FID) { - tdSmaEndCommit(pEnv); - smaCloseDBF(&tSmaH.dFile); - } - tdSetTSmaDataFile(&tSmaH, indexUid, fid); - smaDebug("vgId:%d, write to DBF %s, days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi32 - " queryKey:%" PRIi64, - SMA_VID(pSma), tSmaH.dFile.path, minutePerFile, tSmaH.interval, storageLevel, testSkey); - if (smaOpenDBF(pEnv->dbEnv, &tSmaH.dFile) != 0) { - smaWarn("vgId:%d, open DB file %s failed since %s", SMA_VID(pSma), - tSmaH.dFile.path ? tSmaH.dFile.path : "path is NULL", tstrerror(terrno)); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - tdSmaBeginCommit(pEnv); - } - - if (tdInsertTSmaBlocks(&tSmaH, &smaKey, SMA_KEY_LEN, dataBuf, tlen, &pEnv->txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks fail for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64 - " since %s", - SMA_VID(pSma), indexUid, skey, groupId, tstrerror(terrno)); - tdSmaEndCommit(pEnv); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, insert tsma data blocks success for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64, - SMA_VID(pSma), indexUid, skey, groupId); - // TODO:tsdbEndTSmaCommit(); - - // Step 3: reset the SSmaStat - tdResetExpireWindow(pSma, pStat, indexUid, skey); - } else { - smaWarn("vgId:%d, invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, - SMA_VID(pSma), skey, tlen, indexUid); - } - } - } - tdSmaEndCommit(pEnv); // TODO: not commit for every insert - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} - -int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d, drop tSma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -/** - * @brief Insert TSma data blocks to DB File build by B+Tree - * - * @param pSmaH - * @param smaKey tableUid-colId-skeyOfWindow(8-2-8) - * @param keyLen - * @param pData - * @param dataLen - * @return int32_t - */ -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn) { - SDBFile *pDBFile = &pSmaH->dFile; - - // TODO: insert tsma data blocks into B+Tree(TTB) - if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - uint32_t valueSize = 0; - void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); - ASSERT(data != NULL); - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); - } -#endif - return TSDB_CODE_SUCCESS; -} - -/** - * @brief When sma data received from stream computing, make the relative expire window valid. - * - * @param pSma - * @param pStat - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { - // pItem resides in hash buffer all the time unless drop sma index - // TODO: multithread protect - if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, - indexUid); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), - skey, indexUid); - // TODO: use a standalone interface to received state upate notification from stream computing module. - /** - * @brief state - * - When SMA env init in TSDB, its status is TSDB_SMA_STAT_OK. - * - In startup phase of stream computing module, it should notify the SMA env in TSDB to expired if needed(e.g. - * when batch data caculation not finised) - * - When TSDB_SMA_STAT_OK, the stream computing module should also notify that to the SMA env in TSDB. - */ - pItem->state = TSDB_SMA_STAT_OK; - } else { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); - return TSDB_CODE_FAILED; - } - - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Drop tSma data and local cache - * - insert/query reference - * @param pSma - * @param msg - * @return int32_t - */ -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - - // clear local cache - if (pEnv) { - smaDebug("vgId:%d, drop tSma local cache for %" PRIi64, SMA_VID(pSma), indexUid); - - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { - if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d, tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - - tdWLockSmaEnv(pEnv); - if (tdSmaStatIsDropped(pItem)) { - tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d, tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - tdSmaStatSetDropped(pItem); - tdUnLockSmaEnv(pEnv); - - int32_t nSleep = 0; - int32_t refVal = INT32_MAX; - while (true) { - if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - break; - } - smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - taosSsleep(1); - if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, refVal); - break; - }; - } - - tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); - } - } - // clear sma data files - // TODO: - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma Return the data between queryWin and fill the pData. - * @param pData - * @param indexUid - * @param pQuerySKey - * @param nMaxResult The query invoker should control the nMaxResult need to return to avoid OOM. - * @return int32_t - */ -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - SSmaStat *pStat = NULL; - - if (!pEnv) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - pStat = SMA_ENV_STAT(pEnv); - - tdRefSmaStat(pSma, pStat); - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - // Normally pItem should not be NULL, mark all windows as expired and notify query module to fetch raw TS data if - // it's NULL. - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_FAILED; - } - -#if 0 - int32_t nQueryWin = taosArrayGetSize(pQuerySKey); - for (int32_t n = 0; n < nQueryWin; ++n) { - TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - } - } -#endif - -#if 1 - int8_t smaStat = 0; - if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TSMA_INVALID_STAT; - smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, - tstrerror(terrno), smaStat); - return TSDB_CODE_FAILED; - } - - if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } - - STSma *pTSma = pItem->pTSma; -#endif - -#if 1 - STSmaReadH tReadH = {0}; - tdInitTSmaReadH(&tReadH, pSma, pTSma->interval, pTSma->intervalUnit); - smaCloseDBF(&tReadH.dFile); - - tdUnRefSmaStat(pSma, pStat); - - tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, - SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); - if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - - char smaKey[SMA_KEY_LEN] = {0}; - void *pSmaKey = &smaKey; - int64_t queryGroupId = 0; - tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - - smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, - *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); - - void *result = NULL; - int32_t valueSize = 0; - if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", - SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_FAILED; - } -#endif - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); - } -#endif - taosMemoryFreeClear(result); // TODO: fill the result to output - -#if 0 - int32_t nResult = 0; - int64_t lastKey = 0; - - while (true) { - if (nResult >= nMaxResult) { - break; - } - - // set and open the file according to the STSma param - if (tdSetAndOpenTSmaFile(&tReadH, queryWin)) { - char bTree[100] = "\0"; - while (strncmp(bTree, "has more nodes", 100) == 0) { - if (nResult >= nMaxResult) { - break; - } - // tdGetDataFromBTree(bTree, queryWin, lastKey) - // fill the pData - ++nResult; - } - } - } -#endif - // read data from file and fill the result - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_SUCCESS; -} - -int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { - SSmaCfg *pCfg = (SSmaCfg *)pMsg; - - if (metaCreateTSma(SMA_META(pSma), version, pCfg) < 0) { - return -1; - } - - tdTSmaAdd(pSma, 1); - return 0; -} - -int32_t tdDropTSma(SSma *pSma, char *pMsg) { -#if 0 - SVDropTSmaReq vDropSmaReq = {0}; - if (!tDeserializeSVDropTSmaReq(pMsg, &vDropSmaReq)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // TODO: send msg to stream computing to drop tSma - // if ((send msg to stream computing) < 0) { - // tDestroyTSma(&vCreateSmaReq); - // return -1; - // } - // - - if (metaDropTSma(SMA_META(pSma), vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - if (tdDropTSmaData(pSma, vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - tdTSmaSub(pSma, 1); -#endif - - // TODO: return directly or go on follow steps? - return TSDB_CODE_SUCCESS; -} - -static SSmaStatItem *tdNewSmaStatItem(int8_t state) { - SSmaStatItem *pItem = NULL; - - pItem = (SSmaStatItem *)taosMemoryCalloc(1, sizeof(SSmaStatItem)); - if (!pItem) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pItem->state = state; - pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expireWindows) { - taosMemoryFreeClear(pItem); - return NULL; - } - - return pItem; -} - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem) { - // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later - pItem = tdNewSmaStatItem(TSDB_SMA_STAT_OK); // TODO use the real state - if (!pItem) { - // Response to stream computing: OOM - // For query, if the indexUid not found, the TSDB should tell query module to query raw TS data. - return TSDB_CODE_FAILED; - } - - // cache smaMeta - STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); - if (!pTSma) { - terrno = TSDB_CODET_TSMA_NO_INDEX_IN_META; - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), - indexUid, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - pItem->pTSma = pTSma; - - if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { - // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - return TSDB_CODE_FAILED; - } - } else if (!(pItem = *(SSmaStatItem **)pItem)) { - terrno = TSDB_CODE_INVALID_PTR; - return TSDB_CODE_FAILED; - } - - if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would - // tell query module to query raw TS data. - // N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Update expire window according to msg from stream computing module. - * - * @param pSma - * @param msg SSubmitReq - * @return int32_t - */ -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { - // no time-range-sma, just return success - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d, not update expire window since no tSma", SMA_VID(pSma)); - return TSDB_CODE_SUCCESS; - } - - if (!SMA_META(pSma)) { - terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { - smaError("vgId:%d, init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tSma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - TASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSubmitMsgIter msgIter = {0}; - SSubmitBlk *pBlock = NULL; - SInterval interval = {0}; - TSKEY lastWinSKey = INT64_MIN; - - if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) { - return TSDB_CODE_FAILED; - } - - while (true) { - tGetSubmitMsgNext(&msgIter, &pBlock); - if (!pBlock) break; - - STSmaWrapper *pSW = NULL; - STSma *pTSma = NULL; - - SSubmitBlkIter blkIter = {0}; - if (tInitSubmitBlkIter(&msgIter, pBlock, &blkIter) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - while (true) { - STSRow *row = tGetSubmitBlkNext(&blkIter); - if (!row) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - if (!pSW || (pTSma && (pTSma->tableUid != msgIter.suid))) { - if (pSW) { - pSW = tFreeTSmaWrapper(pSW, false); - } - if (!(pSW = metaGetSmaInfoByTable(SMA_META(pSma), msgIter.suid, false))) { - break; - } - if ((pSW->number) <= 0 || !pSW->tSma) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - pTSma = pSW->tSma; - - interval.interval = pTSma->interval; - interval.intervalUnit = pTSma->intervalUnit; - interval.offset = pTSma->offset; - interval.precision = SMA_TSDB_CFG(pSma)->precision; - interval.sliding = pTSma->sliding; - interval.slidingUnit = pTSma->slidingUnit; - } - - // TODO: process multiple tsma for one table uid - TSKEY winSKey = taosTimeTruncate(TD_ROW_KEY(row), &interval, interval.precision); - - if (lastWinSKey != winSKey) { - lastWinSKey = winSKey; - if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } else { - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", - SMA_VID(pSma), pTSma->indexUid, winSKey); - } - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index 760eb808bb..9c613873ab 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -30,54 +30,8 @@ typedef STsdbCfg STSmaKeepCfg; #define SMA_STATE_ITEM_HASH_SLOT 32 -typedef struct { - SSma *pSma; - SDBFile dFile; - const SArray *pDataBlocks; // sma data - int64_t interval; // interval with the precision of DB -} STSmaWriteH; - -typedef struct { - int32_t iter; - int32_t fid; -} SmaFsIter; - -typedef struct { - STsdb *pTsdb; - SSma *pSma; - SDBFile dFile; - int64_t interval; // interval with the precision of DB - int32_t blockSize; // size of SMA block item - int32_t days; - int8_t storageLevel; - SmaFsIter smaFsIter; -} STSmaReadH; - -typedef enum { - SMA_STORAGE_LEVEL_TSDB = 0, // use days of self-defined e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f200.tsma - SMA_STORAGE_LEVEL_DFILESET = 1 // use days of TS data e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f1906.tsma -} ESmaStorageLevel; - // static func -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted); -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval); -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit); -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit); -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH); -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel); -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid); -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn); -// expire window - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); - /** * @brief Judge the tsma file split days * @@ -87,7 +41,7 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); * @param days unit is minute * @return int32_t */ -int32_t tdGetTSmaDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { +int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { SDecoder coder = {0}; tDecoderInit(&coder, pCont, contLen); @@ -130,225 +84,6 @@ _err: // implementation -/** - * @brief - * - * @param pSmaH - * @param pSma - * @param interval - * @param intervalUnit - * @return int32_t - */ -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit) { - STSmaKeepCfg *pCfg = SMA_TSDB_CFG(pSma); - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->storageLevel = tdGetSmaStorageLevel(pCfg, interval); - pSmaH->days = tdGetTSmaDays(pSma, pSmaH->interval, pSmaH->storageLevel); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Init of tsma FS - * - * @param pReadH - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { - SSma *pSma = pSmaH->pSma; - - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, pSmaH->days, SMA_TSDB_CFG(pSma)->precision)); - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - pSmaH->smaFsIter.iter = 0; - pSmaH->smaFsIter.fid = fid; - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Set and open tsma file if it has key locates in queryWin. - * - * @param pReadH - * @param param - * @param queryWin - * @return true - * @return false - */ -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey) { - // SArray *smaFs = pReadH->pTsdb->fs->cstatus->sf; - // int32_t nSmaFs = taosArrayGetSize(smaFs); - - smaCloseDBF(&pReadH->dFile); - -#if 0 - while (pReadH->smaFsIter.iter < nSmaFs) { - void *pSmaFile = taosArrayGet(smaFs, pReadH->smaFsIter.iter); - if (pSmaFile) { // match(indexName, queryWindow) - // TODO: select the file by index_name ... - pReadH->dFile = pSmaFile; - ++pReadH->smaFsIter.iter; - break; - } - ++pReadH->smaFsIter.iter; - } - - if (pReadH->pDFile) { - tdDebug("vg%d: smaFile %s matched", REPO_ID(pReadH->pTsdb), "[pSmaFile dir]"); - return true; - } -#endif - - return false; -} - -/** - * @brief Approximate value for week/month/year. - * - * @param interval - * @param intervalUnit - * @param precision - * @param adjusted Interval already adjusted according to DB precision - * @return int64_t - */ -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted) { - if (adjusted) { - return interval; - } - - switch (intervalUnit) { - case TIME_UNIT_YEAR: // approximate value - interval *= 365 * 86400 * 1e3; - break; - case TIME_UNIT_MONTH: // approximate value - interval *= 30 * 86400 * 1e3; - break; - case TIME_UNIT_WEEK: // approximate value - interval *= 7 * 86400 * 1e3; - break; - case TIME_UNIT_DAY: // the interval for tSma calculation must <= day - interval *= 86400 * 1e3; - break; - case TIME_UNIT_HOUR: - interval *= 3600 * 1e3; - break; - case TIME_UNIT_MINUTE: - interval *= 60 * 1e3; - break; - case TIME_UNIT_SECOND: - interval *= 1e3; - break; - default: - break; - } - - switch (precision) { - case TSDB_TIME_PRECISION_MILLI: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // nano second - return interval / 1e6; - } else { // ms - return interval; - } - break; - case TSDB_TIME_PRECISION_MICRO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e3; - } else { // ms - return interval * 1e3; - } - break; - case TSDB_TIME_PRECISION_NANO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval * 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval; - } else { // ms - return interval * 1e6; - } - break; - default: // ms - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e6; - } else { // ms - return interval; - } - break; - } - return interval; -} - -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit) { - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->pDataBlocks = pDataBlocks; - pSmaH->dFile.fid = SMA_IVLD_FID; - return TSDB_CODE_SUCCESS; -} - -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH) { - if (pSmaH) { - smaCloseDBF(&pSmaH->dFile); - } -} - -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid) { - SSma *pSma = pSmaH->pSma; - ASSERT(!pSmaH->dFile.path && !pSmaH->dFile.pDB); - - pSmaH->dFile.fid = fid; - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma - * @param interval Interval calculated by DB's precision - * @param storageLevel - * @return int32_t - */ -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - int32_t daysPerFile = pCfg->days; // unit is minute - - if (storageLevel == SMA_STORAGE_LEVEL_TSDB) { - int32_t minutes = SMA_STORAGE_TSDB_TIMES * (interval / tsTickPerMin[pCfg->precision]); - if (minutes > SMA_STORAGE_TSDB_MINUTES) { - daysPerFile = SMA_STORAGE_TSDB_MINUTES; - } - } - - return daysPerFile; -} - -/** - * @brief Judge the tsma storage level - * - * @param pCfg - * @param interval - * @return int32_t - */ -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { - int64_t mInterval = convertTimeFromPrecisionToUnit(interval, pCfg->precision, TIME_UNIT_MINUTE); - if (pCfg->days / mInterval >= SMA_STORAGE_SPLIT_FACTOR) { - return SMA_STORAGE_LEVEL_DFILESET; - } - return SMA_STORAGE_LEVEL_TSDB; -} - /** * @brief Insert/Update Time-range-wise SMA data. * - If interval < SMA_STORAGE_SPLIT_HOURS(e.g. 24), save the SMA data as a part of DFileSet to e.g. @@ -363,7 +98,7 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); -#if 0 + const SArray *pDataBlocks = (const SArray *)msg; // TODO: destroy SSDataBlocks(msg) @@ -386,7 +121,6 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } -#endif SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -411,285 +145,6 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { return TSDB_CODE_SUCCESS; } -int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d, drop tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -/** - * @brief Insert TSma data blocks to DB File build by B+Tree - * - * @param pSmaH - * @param smaKey tableUid-colId-skeyOfWindow(8-2-8) - * @param keyLen - * @param pData - * @param dataLen - * @return int32_t - */ -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn) { - SDBFile *pDBFile = &pSmaH->dFile; - - // TODO: insert tsma data blocks into B+Tree(TTB) - if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - uint32_t valueSize = 0; - void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); - ASSERT(data != NULL); - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); - } -#endif - return TSDB_CODE_SUCCESS; -} - -/** - * @brief When sma data received from stream computing, make the relative expire window valid. - * - * @param pSma - * @param pStat - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { - // pItem resides in hash buffer all the time unless drop sma index - // TODO: multithread protect - if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, - indexUid); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), - skey, indexUid); - // TODO: use a standalone interface to received state upate notification from stream computing module. - /** - * @brief state - * - When SMA env init in TSDB, its status is TSDB_SMA_STAT_OK. - * - In startup phase of stream computing module, it should notify the SMA env in TSDB to expired if needed(e.g. - * when batch data caculation not finised) - * - When TSDB_SMA_STAT_OK, the stream computing module should also notify that to the SMA env in TSDB. - */ - pItem->state = TSDB_SMA_STAT_OK; - } else { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); - return TSDB_CODE_FAILED; - } - - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Drop tsma data and local cache - * - insert/query reference - * @param pSma - * @param msg - * @return int32_t - */ -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - - // clear local cache - if (pEnv) { - smaDebug("vgId:%d, drop tsma local cache for %" PRIi64, SMA_VID(pSma), indexUid); - - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { - if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - - tdWLockSmaEnv(pEnv); - if (tdSmaStatIsDropped(pItem)) { - tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - tdSmaStatSetDropped(pItem); - tdUnLockSmaEnv(pEnv); - - int32_t nSleep = 0; - int32_t refVal = INT32_MAX; - while (true) { - if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - break; - } - smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - taosSsleep(1); - if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, - refVal); - break; - }; - } - - tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); - } - } - // clear sma data files - // TODO: - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma Return the data between queryWin and fill the pData. - * @param pData - * @param indexUid - * @param pQuerySKey - * @param nMaxResult The query invoker should control the nMaxResult need to return to avoid OOM. - * @return int32_t - */ -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - SSmaStat *pStat = NULL; - - if (!pEnv) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - pStat = SMA_ENV_STAT(pEnv); - - tdRefSmaStat(pSma, pStat); - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - // Normally pItem should not be NULL, mark all windows as expired and notify query module to fetch raw TS data if - // it's NULL. - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_FAILED; - } - -#if 0 - int32_t nQueryWin = taosArrayGetSize(pQuerySKey); - for (int32_t n = 0; n < nQueryWin; ++n) { - TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - } - } -#endif - -#if 1 - int8_t smaStat = 0; - if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TSMA_INVALID_STAT; - smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, - tstrerror(terrno), smaStat); - return TSDB_CODE_FAILED; - } - - if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } - - STSma *pTSma = pItem->pTSma; -#endif - -#if 1 - STSmaReadH tReadH = {0}; - tdInitTSmaReadH(&tReadH, pSma, pTSma->interval, pTSma->intervalUnit); - smaCloseDBF(&tReadH.dFile); - - tdUnRefSmaStat(pSma, pStat); - - tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, - SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); - if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - - char smaKey[SMA_KEY_LEN] = {0}; - void *pSmaKey = &smaKey; - int64_t queryGroupId = 0; - tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - - smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, - *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); - - void *result = NULL; - int32_t valueSize = 0; - if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", - SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_FAILED; - } -#endif - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); - } -#endif - taosMemoryFreeClear(result); // TODO: fill the result to output - -#if 0 - int32_t nResult = 0; - int64_t lastKey = 0; - - while (true) { - if (nResult >= nMaxResult) { - break; - } - - // set and open the file according to the STSma param - if (tdSetAndOpenTSmaFile(&tReadH, queryWin)) { - char bTree[100] = "\0"; - while (strncmp(bTree, "has more nodes", 100) == 0) { - if (nResult >= nMaxResult) { - break; - } - // tdGetDataFromBTree(bTree, queryWin, lastKey) - // fill the pData - ++nResult; - } - } - } -#endif - // read data from file and fill the result - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_SUCCESS; -} - int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { SSmaCfg *pCfg = (SSmaCfg *)pMsg; @@ -712,294 +167,4 @@ int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { tdTSmaAdd(pSma, 1); return 0; -} - -int32_t tdDropTSma(SSma *pSma, char *pMsg) { -#if 0 - SVDropTSmaReq vDropSmaReq = {0}; - if (!tDeserializeSVDropTSmaReq(pMsg, &vDropSmaReq)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // TODO: send msg to stream computing to drop tsma - // if ((send msg to stream computing) < 0) { - // tDestroyTSma(&vCreateSmaReq); - // return -1; - // } - // - - if (metaDropTSma(SMA_META(pSma), vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - if (tdDropTSmaData(pSma, vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - tdTSmaSub(pSma, 1); -#endif - - // TODO: return directly or go on follow steps? - return TSDB_CODE_SUCCESS; -} - -static SSmaStatItem *tdNewSmaStatItem(int8_t state) { - SSmaStatItem *pItem = NULL; - - pItem = (SSmaStatItem *)taosMemoryCalloc(1, sizeof(SSmaStatItem)); - if (!pItem) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pItem->state = state; - pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expireWindows) { - taosMemoryFreeClear(pItem); - return NULL; - } - - return pItem; -} - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem) { - // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later - pItem = tdNewSmaStatItem(TSDB_SMA_STAT_OK); // TODO use the real state - if (!pItem) { - // Response to stream computing: OOM - // For query, if the indexUid not found, the TSDB should tell query module to query raw TS data. - return TSDB_CODE_FAILED; - } - - // cache smaMeta - STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); - if (!pTSma) { - terrno = TSDB_CODE_TSMA_NO_INDEX_IN_META; - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), - indexUid, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - pItem->pTSma = pTSma; - - if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { - // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - return TSDB_CODE_FAILED; - } - } else if (!(pItem = *(SSmaStatItem **)pItem)) { - terrno = TSDB_CODE_INVALID_PTR; - return TSDB_CODE_FAILED; - } - - if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would - // tell query module to query raw TS data. - // N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Update expire window according to msg from stream computing module. - * - * @param pSma - * @param msg SSubmitReq - * @return int32_t - */ -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { - // no time-range-sma, just return success - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d, not update expire window since no tsma", SMA_VID(pSma)); - return TSDB_CODE_SUCCESS; - } - - if (!SMA_META(pSma)) { - terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { - smaError("vgId:%d, init tsma env failed since %s", SMA_VID(pSma), terrstr(terrno)); - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tsma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - TASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSubmitMsgIter msgIter = {0}; - SSubmitBlk *pBlock = NULL; - SInterval interval = {0}; - TSKEY lastWinSKey = INT64_MIN; - - if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) { - return TSDB_CODE_FAILED; - } - - while (true) { - tGetSubmitMsgNext(&msgIter, &pBlock); - if (!pBlock) break; - - STSmaWrapper *pSW = NULL; - STSma *pTSma = NULL; - - SSubmitBlkIter blkIter = {0}; - if (tInitSubmitBlkIter(&msgIter, pBlock, &blkIter) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - while (true) { - STSRow *row = tGetSubmitBlkNext(&blkIter); - if (!row) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - if (!pSW || (pTSma && (pTSma->tableUid != msgIter.suid))) { - if (pSW) { - pSW = tFreeTSmaWrapper(pSW, false); - } - if (!(pSW = metaGetSmaInfoByTable(SMA_META(pSma), msgIter.suid, false))) { - break; - } - if ((pSW->number) <= 0 || !pSW->tSma) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - pTSma = pSW->tSma; - - interval.interval = pTSma->interval; - interval.intervalUnit = pTSma->intervalUnit; - interval.offset = pTSma->offset; - interval.precision = SMA_TSDB_CFG(pSma)->precision; - interval.sliding = pTSma->sliding; - interval.slidingUnit = pTSma->slidingUnit; - } - - // TODO: process multiple tsma for one table uid - TSKEY winSKey = taosTimeTruncate(TD_ROW_KEY(row), &interval, interval.precision); - - if (lastWinSKey != winSKey) { - lastWinSKey = winSKey; - if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } else { - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", - SMA_VID(pSma), pTSma->indexUid, winSKey); - } - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Clear skeys from tsma dstVgroups in expire window. - * - * @param pSma - * @param pMsg - * @return int32_t - */ -int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg) { - int64_t indexUid = pMsg->indexUid; - - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaWarn("vgId:%d, not clear expire window since no tsma for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); - terrno = TSDB_CODE_TSMA_INVALID_ENV; - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, true) < 0) { - smaWarn("vgId:%d, not clear expire window since no tsma env", SMA_VID(pSma)); - terrno = TSDB_CODE_TSMA_INVALID_ENV; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tsma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - ASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - smaWarn("vgId:%d, no sma item to clear expire window for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); - terrno = TSDB_CODE_TSMA_NO_INDEX_IN_CACHE; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - for (int64_t i = 0; i < pMsg->nItems; ++i) { - const SVTsmaExpWndItem *pWndItem = &pMsg->items[i]; - int64_t winSKey = pWndItem->skey; - for (int64_t j = 0; j < pWndItem->nKeys; ++j) { - winSKey += pItem->pTSma->interval; - if (taosHashRemove(pItem->expireWindows, &winSKey, sizeof(winSKey)) != 0) { - // If error occurs during taosHashRemove expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB - // would tell query module to query raw TS data. N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " fail", SMA_VID(pSma), winSKey, - indexUid); - terrno = TSDB_CODE_TSMA_RM_SKEY_IN_HASH; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " success", SMA_VID(pSma), winSKey, - indexUid); - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 2f26fba50a..3af8901b2b 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -238,16 +238,13 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) if (msgType == TDMT_VND_SUBMIT) { if (taosHashGetSize(pTq->pStreamTasks) == 0) return 0; - if (tdUpdateExpireWindow(pTq->pVnode->pSma, msg, ver) != 0) { - // TODO handle sma error + void* data = taosMemoryMalloc(msgLen); + if (data == NULL) { + return -1; } - // void* data = taosMemoryMalloc(msgLen); - // if (data == NULL) { - // return -1; - // } - // memcpy(data, msg, msgLen); + memcpy(data, msg, msgLen); - // tqProcessStreamTrigger(pTq, data); + tqProcessStreamTrigger(pTq, data); } return 0; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index eb489a6d81..671e181dac 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -26,7 +26,6 @@ static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void * static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); -static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); int32_t vnodePreprocessReq(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; @@ -177,12 +176,10 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vTrace("vgId:%d, process %s request success, index:%" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); -#if 0 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } -#endif // commit if need if (vnodeShouldCommit(pVnode)) { @@ -254,8 +251,6 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RECOVER_RSP: return tqProcessTaskRecoverRsp(pVnode->pTq, pMsg); - case TDMT_VND_CLR_TSMA_EXP_WNDS: - return vnodeProcessExpWndsClrReq(pVnode, pMsg, msgLen, NULL); default: vError("unknown msg type:%d in fetch queue", pMsg->msgType); return TSDB_CODE_VND_APP_ERROR; @@ -283,10 +278,7 @@ void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { // TODO // blockDebugShowData(data, __func__); -#if 0 tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); -#endif - tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, TD_DEBUG_SMA_ID, NULL); } void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { @@ -900,45 +892,6 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void return 0; } -static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp) { - SVClrTsmaExpWndsReq req = {0}; - SDecoder coder = {0}; - - if (pRsp) { - pRsp->msgType = TDMT_VND_CLR_TSMA_EXP_WNDS_RSP; - pRsp->code = TSDB_CODE_SUCCESS; - pRsp->pCont = NULL; - pRsp->contLen = 0; - } - - // decode and process - tDecoderInit(&coder, pReq, len); - - if (tDecodeSVClrTsmaExpWndsReq(&coder, &req) < 0) { - terrno = TSDB_CODE_MSG_DECODE_ERROR; - if (pRsp) pRsp->code = terrno; - goto _err; - } - - ASSERT(0); - - if (tdClearExpireWindow(pVnode->pSma, (const SVClrTsmaExpWndsReq *)&req) < 0) { - if (pRsp) pRsp->code = terrno; - goto _err; - } - - tDecoderClear(&coder); - vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), - req.indexUid, req.version); - return 0; - -_err: - tDecoderClear(&coder); - vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), - req.indexUid, req.version, terrstr()); - return -1; -} - static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { vInfo("vgId:%d, alter hashrange msg will be processed", TD_VID(pVnode)); From dffbec29c73033cec313f4ee3b199c2125b965fc Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 15:31:49 +0800 Subject: [PATCH 11/16] refactor(sync): add last config index z --- source/libs/sync/src/syncAppendEntriesReply.c | 9 ++- source/libs/sync/src/syncMain.c | 5 +- source/libs/sync/src/syncMessage.c | 2 +- source/libs/sync/src/syncSnapshot.c | 65 ++++++++++++++----- .../libs/sync/test/syncSnapshotSendTest.cpp | 4 +- 5 files changed, 62 insertions(+), 23 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 3d9565bdaf..5caf814cc5 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -190,15 +190,18 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries if (gRaftDetailLog) { char* s = snapshotSender2Str(pSender); sInfo( - "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld" + "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld" "sender:%s", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, s); + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + pSender->snapshot.lastConfigIndex, s); taosMemoryFree(s); } else { sInfo( "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " "lastApplyTerm:%lu lastConfigIndex:%ld", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + pSender->snapshot.lastConfigIndex); } } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index ac96d933ed..e1fbc0bac1 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -35,7 +35,7 @@ #include "syncVoteMgr.h" #include "tref.h" -bool gRaftDetailLog = false; +bool gRaftDetailLog = true; static int32_t tsNodeRefId = -1; @@ -311,6 +311,8 @@ int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { assert(rid == pSyncNode->rid); sMeta->lastConfigIndex = pSyncNode->pRaftCfg->lastConfigIndex; + sTrace("sync get snapshot meta: lastConfigIndex:%ld", pSyncNode->pRaftCfg->lastConfigIndex); + taosReleaseRef(tsNodeRefId, pSyncNode->rid); return 0; } @@ -520,6 +522,7 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { SRaftCfgMeta meta; meta.isStandBy = pSyncInfo->isStandBy; meta.snapshotEnable = pSyncInfo->snapshotEnable; + meta.lastConfigIndex = SYNC_INDEX_INVALID; ret = raftCfgCreateFile((SSyncCfg*)&(pSyncInfo->syncCfg), meta, pSyncNode->configPath); assert(ret == 0); diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 57d62d298e..23165f6790 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -14,9 +14,9 @@ */ #include "syncMessage.h" +#include "syncRaftCfg.h" #include "syncUtil.h" #include "tcoding.h" -#include "syncRaftCfg.h" // --------------------------------------------- cJSON* syncRpcMsg2Json(SRpcMsg* pRpcMsg) { diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 39f2a83c7c..1eff9c98f4 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -15,11 +15,11 @@ #include "syncSnapshot.h" #include "syncIndexMgr.h" +#include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" #include "syncUtil.h" #include "wal.h" -#include "syncRaftCfg.h" static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); @@ -85,10 +85,17 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { // get current snapshot info pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); if (pSender->snapshot.lastConfigIndex != SYNC_INDEX_INVALID) { + /* SSyncRaftEntry *pEntry = NULL; - int32_t code = pSender->pSyncNode->pLogStore->syncLogGetEntry(pSender->pSyncNode->pLogStore, pSender->snapshot.lastConfigIndex, &pEntry); + int32_t code = pSender->pSyncNode->pLogStore->syncLogGetEntry(pSender->pSyncNode->pLogStore, + pSender->snapshot.lastConfigIndex, &pEntry); ASSERT(code == 0); - ASSERT(pEntry == NULL); + ASSERT(pEntry != NULL); + */ + + SSyncRaftEntry *pEntry = + pSender->pSyncNode->pLogStore->getEntry(pSender->pSyncNode->pLogStore, pSender->snapshot.lastConfigIndex); + ASSERT(pEntry != NULL); SRpcMsg rpcMsg; syncEntry2OriginalRpc(pEntry, &rpcMsg); @@ -103,7 +110,6 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { } else { memset(&(pSender->lastConfig), 0, sizeof(SSyncCfg)); } - pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; @@ -135,15 +141,18 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sTrace( - "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld send " + "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); taosMemoryFree(msgStr); } else { - sTrace("sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + sTrace( + "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } syncSnapshotSendDestroy(pMsg); @@ -270,20 +279,25 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sTrace( - "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld send " + "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); taosMemoryFree(msgStr); } else { - sTrace("sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + sTrace( + "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } } else { - sTrace("sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu lastConfigIndex:%ld", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + sTrace( + "sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "lastConfigIndex:%ld", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); } syncSnapshotSendDestroy(pMsg); @@ -569,8 +583,27 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // maybe update lastconfig if (pMsg->lastConfigIndex >= SYNC_INDEX_BEGIN) { + // update new config myIndex + bool IamInNew = false; + SSyncCfg newSyncCfg = pMsg->lastConfig; + for (int i = 0; i < newSyncCfg.replicaNum; ++i) { + if (strcmp(pSyncNode->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && + pSyncNode->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { + newSyncCfg.myIndex = i; + IamInNew = true; + break; + } + } + bool isDrop; - syncNodeUpdateConfig(pSyncNode, &(pMsg->lastConfig), pMsg->lastConfigIndex, &isDrop); + if (IamInNew) { + sTrace("sync event update config by snapshot, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + syncNodeUpdateConfig(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); + } else { + sTrace("sync event do not update config by snapshot, I am not in newCfg, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + } } SSnapshot snapshot; diff --git a/source/libs/sync/test/syncSnapshotSendTest.cpp b/source/libs/sync/test/syncSnapshotSendTest.cpp index d4ae4af654..ca7916359e 100644 --- a/source/libs/sync/test/syncSnapshotSendTest.cpp +++ b/source/libs/sync/test/syncSnapshotSendTest.cpp @@ -30,7 +30,8 @@ SyncSnapshotSend *createMsg() { pMsg->lastConfig.myIndex = 1; for (int i = 0; i < pMsg->lastConfig.replicaNum; ++i) { ((pMsg->lastConfig.nodeInfo)[i]).nodePort = i * 100; - snprintf(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn, sizeof(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn), "100.200.300.%d", i); + snprintf(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn, sizeof(((pMsg->lastConfig.nodeInfo)[i]).nodeFqdn), + "100.200.300.%d", i); } pMsg->seq = 44; @@ -96,7 +97,6 @@ void test5() { } int main() { - gRaftDetailLog = true; tsAsyncLog = 0; From e17396be9cf4c1b4fed067b78d669a4cc5bd3769 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Sat, 11 Jun 2022 15:44:49 +0800 Subject: [PATCH 12/16] fix: some problems of parser --- include/libs/function/functionMgt.h | 3 +- include/libs/nodes/cmdnodes.h | 2 +- include/util/taoserror.h | 1 + source/dnode/mnode/impl/src/mndDb.c | 5 +- source/libs/function/inc/functionMgtInt.h | 1 + source/libs/function/src/builtins.c | 19 +++-- source/libs/function/src/functionMgt.c | 2 + source/libs/parser/src/parAstCreater.c | 29 ++++--- source/libs/parser/src/parTranslater.c | 83 +++++++++++++++++---- source/libs/parser/src/parUtil.c | 4 +- source/libs/parser/test/parInitialATest.cpp | 20 ++++- source/libs/parser/test/parInitialCTest.cpp | 24 +++++- source/libs/parser/test/parSelectTest.cpp | 10 ++- source/libs/parser/test/parTestUtil.h | 2 +- 14 files changed, 152 insertions(+), 53 deletions(-) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index c8e803c811..8888f6ca8e 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -121,7 +121,7 @@ typedef enum EFunctionType { // internal function FUNCTION_TYPE_SELECT_VALUE, - FUNCTION_TYPE_BLOCK_DIST, // block distribution aggregate function + FUNCTION_TYPE_BLOCK_DIST, // block distribution aggregate function // distributed splitting functions FUNCTION_TYPE_APERCENTILE_PARTIAL, @@ -170,6 +170,7 @@ bool fmIsMultiResFunc(int32_t funcId); bool fmIsRepeatScanFunc(int32_t funcId); bool fmIsUserDefinedFunc(int32_t funcId); bool fmIsDistExecFunc(int32_t funcId); +bool fmIsForbidFillFunc(int32_t funcId); int32_t fmGetDistMethod(const SFunctionNode* pFunc, SFunctionNode** pPartialFunc, SFunctionNode** pMergeFunc); diff --git a/include/libs/nodes/cmdnodes.h b/include/libs/nodes/cmdnodes.h index c267c89384..25369f2342 100644 --- a/include/libs/nodes/cmdnodes.h +++ b/include/libs/nodes/cmdnodes.h @@ -47,7 +47,7 @@ typedef struct SDatabaseOptions { int32_t maxRowsPerBlock; int32_t minRowsPerBlock; SNodeList* pKeep; - int32_t keep[3]; + int64_t keep[3]; int32_t pages; int32_t pagesize; char precisionStr[3]; diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ce6a3f2ce7..ecddd0e0c5 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -655,6 +655,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_PAR_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x2654) #define TSDB_CODE_PAR_INVALID_DELETE_WHERE TAOS_DEF_ERROR_CODE(0, 0x2655) #define TSDB_CODE_PAR_INVALID_REDISTRIBUTE_VG TAOS_DEF_ERROR_CODE(0, 0x2656) +#define TSDB_CODE_PAR_FILL_NOT_ALLOWED_FUNC TAOS_DEF_ERROR_CODE(0, 0x2657) //planner #define TSDB_CODE_PLAN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x2700) diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index e6c93a9bfd..c20459829e 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1424,10 +1424,10 @@ static void dumpDbInfoData(SSDataBlock *pBlock, SDbObj *pDb, SShowObj *pShow, in char tmp[128] = {0}; int32_t len = 0; if (pDb->cfg.daysToKeep0 > pDb->cfg.daysToKeep1 || pDb->cfg.daysToKeep0 > pDb->cfg.daysToKeep2) { - len = sprintf(&tmp[VARSTR_HEADER_SIZE], "%d,%d,%d", pDb->cfg.daysToKeep1, pDb->cfg.daysToKeep2, + len = sprintf(&tmp[VARSTR_HEADER_SIZE], "%dm,%dm,%dm", pDb->cfg.daysToKeep1, pDb->cfg.daysToKeep2, pDb->cfg.daysToKeep0); } else { - len = sprintf(&tmp[VARSTR_HEADER_SIZE], "%d,%d,%d", pDb->cfg.daysToKeep0, pDb->cfg.daysToKeep1, + len = sprintf(&tmp[VARSTR_HEADER_SIZE], "%dm,%dm,%dm", pDb->cfg.daysToKeep0, pDb->cfg.daysToKeep1, pDb->cfg.daysToKeep2); } @@ -1592,4 +1592,3 @@ static void mndCancelGetNextDb(SMnode *pMnode, void *pIter) { SSdb *pSdb = pMnode->pSdb; sdbCancelFetch(pSdb, pIter); } - diff --git a/source/libs/function/inc/functionMgtInt.h b/source/libs/function/inc/functionMgtInt.h index 29dd0bcd90..d1af6b6051 100644 --- a/source/libs/function/inc/functionMgtInt.h +++ b/source/libs/function/inc/functionMgtInt.h @@ -41,6 +41,7 @@ extern "C" { #define FUNC_MGT_SCAN_PC_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(12) #define FUNC_MGT_SELECT_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(13) #define FUNC_MGT_REPEAT_SCAN_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(14) +#define FUNC_MGT_FORBID_FILL_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(15) #define FUNC_MGT_TEST_MASK(val, mask) (((val) & (mask)) != 0) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 23ec649919..2d6c95c5f2 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -294,7 +294,8 @@ static int32_t translateApercentileImpl(SFunctionNode* pFunc, char* pErrBuf, int pValue->notReserved = true; } - pFunc->node.resType = (SDataType){.bytes = getApercentileMaxSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + pFunc->node.resType = + (SDataType){.bytes = getApercentileMaxSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; } else { if (1 != numOfParams) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -479,7 +480,8 @@ static int32_t translateElapsedImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t } } - pFunc->node.resType = (SDataType){.bytes = getElapsedInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + pFunc->node.resType = + (SDataType){.bytes = getElapsedInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; } else { if (1 != numOfParams) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -593,7 +595,8 @@ static int32_t translateHistogramImpl(SFunctionNode* pFunc, char* pErrBuf, int32 return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); } - pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + pFunc->node.resType = + (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; } else { if (1 != numOfParams) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -631,7 +634,8 @@ static int32_t translateHLLImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len } if (isPartial) { - pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + pFunc->node.resType = + (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; } else { pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes, .type = TSDB_DATA_TYPE_BIGINT}; } @@ -1127,7 +1131,7 @@ static bool validateTimezoneFormat(const SValueNode* pVal) { char* tz = varDataVal(pVal->datum.p); int32_t len = varDataLen(pVal->datum.p); - char buf[3] = {0}; + char buf[3] = {0}; int8_t hour = -1, minute = -1; if (len == 0) { return false; @@ -1320,7 +1324,7 @@ static int32_t translateSelectValue(SFunctionNode* pFunc, char* pErrBuf, int32_t } static int32_t translateBlockDistFunc(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { - pFunc->node.resType = (SDataType) {.bytes = 128, .type = TSDB_DATA_TYPE_VARCHAR}; + pFunc->node.resType = (SDataType){.bytes = 128, .type = TSDB_DATA_TYPE_VARCHAR}; return TSDB_CODE_SUCCESS; } @@ -1329,7 +1333,6 @@ static bool getBlockDistFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv return true; } - // clang-format off const SBuiltinFuncDefinition funcMgtBuiltins[] = { { @@ -1608,7 +1611,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "histogram", .type = FUNCTION_TYPE_HISTOGRAM, - .classification = FUNC_MGT_AGG_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_FORBID_FILL_FUNC, .translateFunc = translateHistogram, .getEnvFunc = getHistogramFuncEnv, .initFunc = histogramFunctionSetup, diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index f2514f54f1..df09d3e529 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -159,6 +159,8 @@ bool fmIsRepeatScanFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, bool fmIsUserDefinedFunc(int32_t funcId) { return funcId > FUNC_UDF_ID_START; } +bool fmIsForbidFillFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_FORBID_FILL_FUNC); } + void fmFuncMgtDestroy() { void* m = gFunMgtService.pFuncNameHashTable; if (m != NULL && atomic_val_compare_exchange_ptr((void**)&gFunMgtService.pFuncNameHashTable, m, 0) == m) { diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 613a2d867d..054912d540 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -346,25 +346,30 @@ SNode* createPlaceholderValueNode(SAstCreateContext* pCxt, const SToken* pLitera return (SNode*)val; } +static int32_t addParamToLogicConditionNode(SLogicConditionNode* pCond, SNode* pParam) { + if (QUERY_NODE_LOGIC_CONDITION == nodeType(pParam) && pCond->condType == ((SLogicConditionNode*)pParam)->condType) { + int32_t code = nodesListAppendList(pCond->pParameterList, ((SLogicConditionNode*)pParam)->pParameterList); + ((SLogicConditionNode*)pParam)->pParameterList = NULL; + nodesDestroyNode(pParam); + return code; + } else { + return nodesListAppend(pCond->pParameterList, pParam); + } +} + SNode* createLogicConditionNode(SAstCreateContext* pCxt, ELogicConditionType type, SNode* pParam1, SNode* pParam2) { CHECK_PARSER_STATUS(pCxt); SLogicConditionNode* cond = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); CHECK_OUT_OF_MEM(cond); cond->condType = type; cond->pParameterList = nodesMakeList(); - if (QUERY_NODE_LOGIC_CONDITION == nodeType(pParam1) && type == ((SLogicConditionNode*)pParam1)->condType) { - nodesListAppendList(cond->pParameterList, ((SLogicConditionNode*)pParam1)->pParameterList); - ((SLogicConditionNode*)pParam1)->pParameterList = NULL; - nodesDestroyNode(pParam1); - } else { - nodesListAppend(cond->pParameterList, pParam1); + int32_t code = addParamToLogicConditionNode(cond, pParam1); + if (TSDB_CODE_SUCCESS == code && NULL != pParam2) { + code = addParamToLogicConditionNode(cond, pParam2); } - if (QUERY_NODE_LOGIC_CONDITION == nodeType(pParam2) && type == ((SLogicConditionNode*)pParam2)->condType) { - nodesListAppendList(cond->pParameterList, ((SLogicConditionNode*)pParam2)->pParameterList); - ((SLogicConditionNode*)pParam2)->pParameterList = NULL; - nodesDestroyNode(pParam2); - } else { - nodesListAppend(cond->pParameterList, pParam2); + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode(cond); + return NULL; } return (SNode*)cond; } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 178cc2595a..8ca6332a8d 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -733,7 +733,7 @@ static EDealRes translateValueImpl(STranslateContext* pCxt, SValueNode* pVal, SD } int32_t len = 0; - if (!taosMbsToUcs4(pVal->literal, pVal->node.resType.bytes, (TdUcs4*)varDataVal(pVal->datum.p), + if (!taosMbsToUcs4(pVal->literal, strlen(pVal->literal), (TdUcs4*)varDataVal(pVal->datum.p), targetDt.bytes - VARSTR_HEADER_SIZE, &len)) { return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pVal->literal); } @@ -974,6 +974,9 @@ static int32_t getFuncInfo(STranslateContext* pCxt, SFunctionNode* pFunc) { } static int32_t translateAggFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { + if (!fmIsAggFunc(pFunc->funcId)) { + return TSDB_CODE_SUCCESS; + } if (beforeHaving(pCxt->currClause)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION); } @@ -991,6 +994,9 @@ static int32_t translateAggFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { } static int32_t translateScanPseudoColumnFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { + if (!fmIsScanPseudoColumnFunc(pFunc->funcId)) { + return TSDB_CODE_SUCCESS; + } if (0 == LIST_LENGTH(pFunc->pParameterList)) { if (QUERY_NODE_REAL_TABLE != nodeType(pCxt->pCurrSelectStmt->pFromTable)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TBNAME); @@ -1007,6 +1013,9 @@ static int32_t translateScanPseudoColumnFunc(STranslateContext* pCxt, SFunctionN } static int32_t translateIndefiniteRowsFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { + if (!fmIsIndefiniteRowsFunc(pFunc->funcId)) { + return TSDB_CODE_SUCCESS; + } if (SQL_CLAUSE_SELECT != pCxt->currClause || pCxt->pCurrSelectStmt->hasIndefiniteRowsFunc || pCxt->pCurrSelectStmt->hasAggFuncs) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_NOT_ALLOWED_FUNC); @@ -1017,6 +1026,18 @@ static int32_t translateIndefiniteRowsFunc(STranslateContext* pCxt, SFunctionNod return TSDB_CODE_SUCCESS; } +static int32_t translateForbidFillFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { + if (!fmIsForbidFillFunc(pFunc->funcId)) { + return TSDB_CODE_SUCCESS; + } + if (NULL != pCxt->pCurrSelectStmt->pWindow && + QUERY_NODE_INTERVAL_WINDOW == nodeType(pCxt->pCurrSelectStmt->pWindow) && + NULL != ((SIntervalWindowNode*)pCxt->pCurrSelectStmt->pWindow)->pFill) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_FILL_NOT_ALLOWED_FUNC, pFunc->functionName); + } + return TSDB_CODE_SUCCESS; +} + static void setFuncClassification(SSelectStmt* pSelect, SFunctionNode* pFunc) { if (NULL != pSelect) { pSelect->hasAggFuncs = pSelect->hasAggFuncs ? true : fmIsAggFunc(pFunc->funcId); @@ -1034,15 +1055,18 @@ static EDealRes translateFunction(STranslateContext* pCxt, SFunctionNode* pFunc) } pCxt->errCode = getFuncInfo(pCxt, pFunc); - if (TSDB_CODE_SUCCESS == pCxt->errCode && fmIsAggFunc(pFunc->funcId)) { + if (TSDB_CODE_SUCCESS == pCxt->errCode) { pCxt->errCode = translateAggFunc(pCxt, pFunc); } - if (TSDB_CODE_SUCCESS == pCxt->errCode && fmIsScanPseudoColumnFunc(pFunc->funcId)) { + if (TSDB_CODE_SUCCESS == pCxt->errCode) { pCxt->errCode = translateScanPseudoColumnFunc(pCxt, pFunc); } - if (TSDB_CODE_SUCCESS == pCxt->errCode && fmIsIndefiniteRowsFunc(pFunc->funcId)) { + if (TSDB_CODE_SUCCESS == pCxt->errCode) { pCxt->errCode = translateIndefiniteRowsFunc(pCxt, pFunc); } + if (TSDB_CODE_SUCCESS == pCxt->errCode) { + pCxt->errCode = translateForbidFillFunc(pCxt, pFunc); + } if (TSDB_CODE_SUCCESS == pCxt->errCode) { setFuncClassification(pCxt->pCurrSelectStmt, pFunc); } @@ -2365,7 +2389,9 @@ static int32_t checkDbRetentionsOption(STranslateContext* pCxt, SNodeList* pRete return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); } - SNode* pRetention = NULL; + SValueNode* pPrevFreq = NULL; + SValueNode* pPrevKeep = NULL; + SNode* pRetention = NULL; FOREACH(pRetention, pRetentions) { SNode* pNode = NULL; FOREACH(pNode, ((SNodeListNode*)pRetention)->pNodeList) { @@ -2374,6 +2400,16 @@ static int32_t checkDbRetentionsOption(STranslateContext* pCxt, SNodeList* pRete return pCxt->errCode; } } + + SValueNode* pFreq = (SValueNode*)nodesListGetNode(((SNodeListNode*)pRetention)->pNodeList, 0); + SValueNode* pKeep = (SValueNode*)nodesListGetNode(((SNodeListNode*)pRetention)->pNodeList, 1); + if (pFreq->datum.i <= 0 || 'n' == pFreq->unit || 'y' == pFreq->unit || pFreq->datum.i >= pKeep->datum.i || + (NULL != pPrevFreq && pPrevFreq->datum.i >= pFreq->datum.i) || + (NULL != pPrevKeep && pPrevKeep->datum.i > pKeep->datum.i)) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + } + pPrevFreq = pFreq; + pPrevKeep = pKeep; } return TSDB_CODE_SUCCESS; @@ -2593,12 +2629,23 @@ static int32_t checkTableSmaOption(STranslateContext* pCxt, SCreateTableStmt* pS return TSDB_CODE_SUCCESS; } +static bool validRollupFunc(const char* pFunc) { + static const char* rollupFuncs[] = {"avg", "sum", "min", "max", "last", "first"}; + static const int32_t numOfRollupFuncs = (sizeof(rollupFuncs) / sizeof(char*)); + for (int i = 0; i < numOfRollupFuncs; ++i) { + if (0 == strcmp(rollupFuncs[i], pFunc)) { + return true; + } + } + return false; +} + static int32_t checkTableRollupOption(STranslateContext* pCxt, SNodeList* pFuncs) { if (NULL == pFuncs) { return TSDB_CODE_SUCCESS; } - if (1 != LIST_LENGTH(pFuncs)) { + if (1 != LIST_LENGTH(pFuncs) || !validRollupFunc(((SFunctionNode*)nodesListGetNode(pFuncs, 0))->functionName)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_ROLLUP_OPTION); } return TSDB_CODE_SUCCESS; @@ -3083,15 +3130,14 @@ static int32_t translateAlterTable(STranslateContext* pCxt, SAlterTableStmt* pSt SName tableName; tNameExtractFullName(toName(pCxt->pParseCxt->acctId, pStmt->dbName, pStmt->tableName, &tableName), alterReq.name); alterReq.alterType = pStmt->alterType; - if (TSDB_ALTER_TABLE_UPDATE_TAG_VAL == pStmt->alterType) { - return TSDB_CODE_FAILED; - } else { - if (TSDB_CODE_SUCCESS != setAlterTableField(pStmt, &alterReq)) { - return TSDB_CODE_OUT_OF_MEMORY; - } + if (TSDB_ALTER_TABLE_UPDATE_TAG_VAL == pStmt->alterType || TSDB_ALTER_TABLE_UPDATE_COLUMN_NAME == pStmt->alterType) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_ALTER_TABLE); } - - return buildCmdMsg(pCxt, TDMT_MND_ALTER_STB, (FSerializeFunc)tSerializeSMAlterStbReq, &alterReq); + int32_t code = setAlterTableField(pStmt, &alterReq); + if (TSDB_CODE_SUCCESS == code) { + code = buildCmdMsg(pCxt, TDMT_MND_ALTER_STB, (FSerializeFunc)tSerializeSMAlterStbReq, &alterReq); + } + return code; } static int32_t translateUseDatabase(STranslateContext* pCxt, SUseDatabaseStmt* pStmt) { @@ -3171,7 +3217,7 @@ static int32_t nodeTypeToShowType(ENodeType nt) { case QUERY_NODE_SHOW_QUERIES_STMT: return TSDB_MGMT_TABLE_QUERIES; case QUERY_NODE_SHOW_VARIABLE_STMT: - return 0; // todo + return TSDB_MGMT_TABLE_CONFIGS; default: break; } @@ -3778,6 +3824,7 @@ static int32_t translateQuery(STranslateContext* pCxt, SNode* pNode) { case QUERY_NODE_SHOW_CONNECTIONS_STMT: case QUERY_NODE_SHOW_QUERIES_STMT: case QUERY_NODE_SHOW_TOPICS_STMT: + case QUERY_NODE_SHOW_VARIABLE_STMT: code = translateShow(pCxt, (SShowStmt*)pNode); break; case QUERY_NODE_CREATE_INDEX_STMT: @@ -4932,7 +4979,11 @@ static int32_t buildAlterTbReq(STranslateContext* pCxt, SAlterTableStmt* pStmt, case TSDB_ALTER_TABLE_UPDATE_OPTIONS: return buildUpdateOptionsReq(pCxt, pStmt, pReq); case TSDB_ALTER_TABLE_UPDATE_COLUMN_NAME: - return buildRenameColReq(pCxt, pStmt, pTableMeta, pReq); + if (TSDB_CHILD_TABLE == pTableMeta->tableType) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_ALTER_TABLE); + } else { + return buildRenameColReq(pCxt, pStmt, pTableMeta, pReq); + } default: break; } diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 1e5a6681ee..716b120af5 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -76,7 +76,7 @@ static char* getSyntaxErrFormat(int32_t errCode) { case TSDB_CODE_PAR_INVALID_KEEP_ORDER: return "Invalid keep value, should be keep0 <= keep1 <= keep2"; case TSDB_CODE_PAR_INVALID_KEEP_VALUE: - return "Invalid option keep: %d, %d, %d valid range: [%d, %d]"; + return "Invalid option keep: %" PRId64 ", %" PRId64 ", %" PRId64 " valid range: [%dm, %dm]"; case TSDB_CODE_PAR_INVALID_COMMENT_OPTION: return "Invalid option comment, length cannot exceed %d"; case TSDB_CODE_PAR_INVALID_F_RANGE_OPTION: @@ -182,6 +182,8 @@ static char* getSyntaxErrFormat(int32_t errCode) { return "The DELETE statement must have a definite time window range"; case TSDB_CODE_PAR_INVALID_REDISTRIBUTE_VG: return "The REDISTRIBUTE VGROUP statement only support 1 to 3 dnodes"; + case TSDB_CODE_PAR_FILL_NOT_ALLOWED_FUNC: + return "%s function not allowed in fill query"; case TSDB_CODE_OUT_OF_MEMORY: return "Out of memory"; default: diff --git a/source/libs/parser/test/parInitialATest.cpp b/source/libs/parser/test/parInitialATest.cpp index 22b244145b..f554651b90 100644 --- a/source/libs/parser/test/parInitialATest.cpp +++ b/source/libs/parser/test/parInitialATest.cpp @@ -24,7 +24,7 @@ class ParserInitialATest : public ParserDdlTest {}; TEST_F(ParserInitialATest, alterAccount) { useDb("root", "test"); - run("ALTER ACCOUNT ac_wxy PASS '123456'", TSDB_CODE_PAR_EXPRIE_STATEMENT); + run("ALTER ACCOUNT ac_wxy PASS '123456'", TSDB_CODE_PAR_EXPRIE_STATEMENT, PARSER_STAGE_PARSE); } TEST_F(ParserInitialATest, alterDnode) { @@ -157,8 +157,8 @@ TEST_F(ParserInitialATest, alterSTable) { 20 + VARSTR_HEADER_SIZE); run("ALTER TABLE st1 MODIFY COLUMN c1 VARCHAR(20)"); - setAlterStbReqFunc("st1", TSDB_ALTER_TABLE_UPDATE_COLUMN_NAME, 2, "c1", 0, 0, "cc1"); - run("ALTER TABLE st1 RENAME COLUMN c1 cc1"); + // setAlterStbReqFunc("st1", TSDB_ALTER_TABLE_UPDATE_COLUMN_NAME, 2, "c1", 0, 0, "cc1"); + // run("ALTER TABLE st1 RENAME COLUMN c1 cc1"); setAlterStbReqFunc("st1", TSDB_ALTER_TABLE_ADD_TAG, 1, "tag11", TSDB_DATA_TYPE_BIGINT); run("ALTER TABLE st1 ADD TAG tag11 BIGINT"); @@ -177,6 +177,12 @@ TEST_F(ParserInitialATest, alterSTable) { // ADD {FULLTEXT | SMA} INDEX index_name (col_name [, col_name] ...) [index_option] } +TEST_F(ParserInitialATest, alterSTableSemanticCheck) { + useDb("root", "test"); + + run("ALTER TABLE st1 RENAME COLUMN c1 cc1", TSDB_CODE_PAR_INVALID_ALTER_TABLE); +} + TEST_F(ParserInitialATest, alterTable) { useDb("root", "test"); @@ -299,6 +305,12 @@ TEST_F(ParserInitialATest, alterTable) { // ADD {FULLTEXT | SMA} INDEX index_name (col_name [, col_name] ...) [index_option] } +TEST_F(ParserInitialATest, alterTableSemanticCheck) { + useDb("root", "test"); + + run("ALTER TABLE st1s1 RENAME COLUMN c1 cc1", TSDB_CODE_PAR_INVALID_ALTER_TABLE); +} + TEST_F(ParserInitialATest, alterUser) { useDb("root", "test"); @@ -323,7 +335,7 @@ TEST_F(ParserInitialATest, balanceVgroup) { TEST_F(ParserInitialATest, bug001) { useDb("root", "test"); - run("ALTER DATABASE db WAL 0 # td-14436", TSDB_CODE_PAR_SYNTAX_ERROR); + run("ALTER DATABASE db WAL 0 # td-14436", TSDB_CODE_PAR_SYNTAX_ERROR, PARSER_STAGE_PARSE); } } // namespace ParserTest \ No newline at end of file diff --git a/source/libs/parser/test/parInitialCTest.cpp b/source/libs/parser/test/parInitialCTest.cpp index d996ca196a..f306947f76 100644 --- a/source/libs/parser/test/parInitialCTest.cpp +++ b/source/libs/parser/test/parInitialCTest.cpp @@ -27,7 +27,7 @@ class ParserInitialCTest : public ParserDdlTest {}; TEST_F(ParserInitialCTest, createAccount) { useDb("root", "test"); - run("CREATE ACCOUNT ac_wxy PASS '123456'", TSDB_CODE_PAR_EXPRIE_STATEMENT); + run("CREATE ACCOUNT ac_wxy PASS '123456'", TSDB_CODE_PAR_EXPRIE_STATEMENT, PARSER_STAGE_PARSE); } TEST_F(ParserInitialCTest, createBnode) { @@ -186,7 +186,7 @@ TEST_F(ParserInitialCTest, createDatabase) { setDbReplicaFunc(3); addDbRetentionFunc(15 * MILLISECOND_PER_SECOND, 7 * MILLISECOND_PER_DAY, TIME_UNIT_SECOND, TIME_UNIT_DAY); addDbRetentionFunc(1 * MILLISECOND_PER_MINUTE, 21 * MILLISECOND_PER_DAY, TIME_UNIT_MINUTE, TIME_UNIT_DAY); - addDbRetentionFunc(15 * MILLISECOND_PER_MINUTE, 5, TIME_UNIT_MINUTE, TIME_UNIT_YEAR); + addDbRetentionFunc(15 * MILLISECOND_PER_MINUTE, 500 * MILLISECOND_PER_DAY, TIME_UNIT_MINUTE, TIME_UNIT_DAY); setDbStrictaFunc(1); setDbWalLevelFunc(2); setDbVgroupsFunc(100); @@ -205,7 +205,7 @@ TEST_F(ParserInitialCTest, createDatabase) { "PAGESIZE 8 " "PRECISION 'ns' " "REPLICA 3 " - "RETENTIONS 15s:7d,1m:21d,15m:5y " + "RETENTIONS 15s:7d,1m:21d,15m:500d " "STRICT 1 " "WAL 2 " "VGROUPS 100 " @@ -220,6 +220,17 @@ TEST_F(ParserInitialCTest, createDatabase) { "KEEP 1440m,300h,400d "); } +TEST_F(ParserInitialCTest, createDatabaseSemanticCheck) { + useDb("root", "test"); + + run("create database db2 retentions 0s:1d", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + run("create database db2 retentions 10s:0d", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + run("create database db2 retentions 1w:1d", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + run("create database db2 retentions 1w:1n", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + run("create database db2 retentions 15s:7d,15m:21d,10m:500d", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); + run("create database db2 retentions 15s:7d,5m:21d,10m:10d", TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION); +} + TEST_F(ParserInitialCTest, createDnode) { useDb("root", "test"); @@ -434,6 +445,13 @@ TEST_F(ParserInitialCTest, createStable) { "TTL 100 COMMENT 'test create table' SMA(c1, c2, c3) ROLLUP (MIN) FILE_FACTOR 0.1"); } +TEST_F(ParserInitialCTest, createStableSemanticCheck) { + useDb("root", "test"); + + run("CREATE STABLE stb2 (ts TIMESTAMP, c1 INT) TAGS (tag1 INT) ROLLUP(CEIL) FILE_FACTOR 0.1", + TSDB_CODE_PAR_INVALID_ROLLUP_OPTION, PARSER_STAGE_TRANSLATE); +} + TEST_F(ParserInitialCTest, createStream) { useDb("root", "test"); diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index 154e28a02c..51d302fe12 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -65,6 +65,8 @@ TEST_F(ParserSelectTest, condition) { run("SELECT c1 FROM t1 WHERE ts in (true, false)"); + run("SELECT c1 FROM t1 WHERE NOT ts in (true, false)"); + run("SELECT * FROM t1 WHERE c1 > 10 and c1 is not null"); } @@ -212,9 +214,11 @@ TEST_F(ParserSelectTest, interval) { TEST_F(ParserSelectTest, intervalSemanticCheck) { useDb("root", "test"); - run("SELECT c1 FROM t1 INTERVAL(10s)", TSDB_CODE_PAR_NOT_SINGLE_GROUP, PARSER_STAGE_TRANSLATE); - run("SELECT DISTINCT c1, c2 FROM t1 WHERE c1 > 3 INTERVAL(1d) FILL(NEXT)", TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE, - PARSER_STAGE_TRANSLATE); + run("SELECT c1 FROM t1 INTERVAL(10s)", TSDB_CODE_PAR_NOT_SINGLE_GROUP); + run("SELECT DISTINCT c1, c2 FROM t1 WHERE c1 > 3 INTERVAL(1d) FILL(NEXT)", TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE); + run("SELECT HISTOGRAM(c1, 'log_bin', '{\"start\": -33,\"factor\": 55,\"count\": 5,\"infinity\": false}', 1) FROM t1 " + "WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' INTERVAL(10s) FILL(NULL)", + TSDB_CODE_PAR_FILL_NOT_ALLOWED_FUNC); } TEST_F(ParserSelectTest, subquery) { diff --git a/source/libs/parser/test/parTestUtil.h b/source/libs/parser/test/parTestUtil.h index 07f3d3cece..ad21252c2b 100644 --- a/source/libs/parser/test/parTestUtil.h +++ b/source/libs/parser/test/parTestUtil.h @@ -36,7 +36,7 @@ class ParserTestBase : public testing::Test { void login(const std::string& user); void useDb(const std::string& acctId, const std::string& db); - void run(const std::string& sql, int32_t expect = TSDB_CODE_SUCCESS, ParserStage checkStage = PARSER_STAGE_ALL); + void run(const std::string& sql, int32_t expect = TSDB_CODE_SUCCESS, ParserStage checkStage = PARSER_STAGE_TRANSLATE); virtual void checkDdl(const SQuery* pQuery, ParserStage stage); From 104a1bb59bb2e149f3684cfbafe633b17b206cc3 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 16:11:18 +0800 Subject: [PATCH 13/16] fix(sync): snapshot overwrite config change --- source/libs/sync/src/syncMain.c | 8 ++++---- source/libs/sync/src/syncSnapshot.c | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index e1fbc0bac1..9899f36607 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1189,7 +1189,7 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l for (int i = 0; i < oldConfig.replicaNum; ++i) { if (strcmp((oldConfig.nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (oldConfig.nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { - IamInOld = false; + IamInOld = true; break; } } @@ -1197,7 +1197,7 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l for (int i = 0; i < newConfig->replicaNum; ++i) { if (strcmp((newConfig->nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (newConfig->nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { - IamInNew = false; + IamInNew = true; break; } } @@ -1240,7 +1240,7 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { } void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { - sInfo("sync event vgId:%d become follower, %s", pSyncNode->vgId, debugStr); + sInfo("sync event vgId:%d become follower, isStandBy:%d, %s", pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, debugStr); // maybe clear leader cache if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { @@ -1274,7 +1274,7 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { // /\ UNCHANGED <> // void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { - sInfo("sync event vgId:%d become leader, %s", pSyncNode->vgId, debugStr); + sInfo("sync event vgId:%d become leader, isStandBy:%d, %s", pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, debugStr); // state change pSyncNode->state = TAOS_SYNC_STATE_LEADER; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 1eff9c98f4..36598cc2bd 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -597,12 +597,23 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { bool isDrop; if (IamInNew) { - sTrace("sync event update config by snapshot, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", - pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + sTrace("sync event vgId:%d update config by snapshot, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", + pSyncNode->vgId, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); syncNodeUpdateConfig(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); } else { - sTrace("sync event do not update config by snapshot, I am not in newCfg, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", - pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + sTrace( + "sync event vgId:%d do not update config by snapshot, I am not in newCfg, lastIndex:%ld, lastTerm:%lu, " + "lastConfigIndex:%ld ", + pSyncNode->vgId, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + } + + // change isStandBy to normal + if (!isDrop) { + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + syncNodeBecomeLeader(pSyncNode, "config change"); + } else { + syncNodeBecomeFollower(pSyncNode, "config change"); + } } } From f92f1bbcc07538f59322639d686a84980f90929c Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 11 Jun 2022 16:20:15 +0800 Subject: [PATCH 14/16] fix(sync): snapshot overwrite config change --- source/libs/sync/src/syncMain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 9899f36607..c480df0ec0 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -35,7 +35,7 @@ #include "syncVoteMgr.h" #include "tref.h" -bool gRaftDetailLog = true; +bool gRaftDetailLog = false; static int32_t tsNodeRefId = -1; From c8af8daa611b88962a99926e1d282ac4200ca4ad Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Sat, 11 Jun 2022 16:03:37 +0800 Subject: [PATCH 15/16] feat(stream): state\session max delay --- source/libs/executor/src/timewindowoperator.c | 56 +++--- tests/script/jenkins/basic.txt | 1 + .../tsim/stream/distributeInterval0.sim | 176 ++++++++++++++++++ 3 files changed, 207 insertions(+), 26 deletions(-) create mode 100644 tests/script/tsim/stream/distributeInterval0.sim diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index ab595a3e34..e70a4c413c 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2649,8 +2649,8 @@ typedef SResultWindowInfo* (*__get_win_info_)(void*); SResultWindowInfo* getSessionWinInfo(void* pData) { return (SResultWindowInfo*)pData; } SResultWindowInfo* getStateWinInfo(void* pData) { return &((SStateWindowInfo*)pData)->winInfo; } -int32_t closeSessionWindow(SArray* pWins, STimeWindowAggSupp* pTwSup, SArray* pClosed, int8_t calTrigger, - __get_win_info_ fn) { +int32_t closeSessionWindow(SArray* pWins, STimeWindowAggSupp* pTwSup, SArray* pClosed, + __get_win_info_ fn) { // Todo(liuyao) save window to tdb int32_t size = taosArrayGetSize(pWins); for (int32_t i = 0; i < size; i++) { @@ -2658,19 +2658,9 @@ int32_t closeSessionWindow(SArray* pWins, STimeWindowAggSupp* pTwSup, SArray* pC SResultWindowInfo* pSeWin = fn(pWin); if (pSeWin->win.ekey < pTwSup->maxTs - pTwSup->waterMark) { if (!pSeWin->isClosed) { - SResKeyPos* pos = taosMemoryMalloc(sizeof(SResKeyPos) + sizeof(uint64_t)); - if (pos == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - pos->groupId = 0; - pos->pos = pSeWin->pos; - *(int64_t*)pos->key = pSeWin->win.ekey; - if (!taosArrayPush(pClosed, &pos)) { - taosMemoryFree(pos); - return TSDB_CODE_OUT_OF_MEMORY; - } pSeWin->isClosed = true; - if (calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + int32_t code = saveResult(pSeWin->win.skey, pSeWin->pos.pageId, pSeWin->pos.offset, 0, pClosed); pSeWin->isOutput = true; } } @@ -2681,6 +2671,19 @@ int32_t closeSessionWindow(SArray* pWins, STimeWindowAggSupp* pTwSup, SArray* pC return TSDB_CODE_SUCCESS; } +int32_t getAllSessionWindow(SArray* pWins, SArray* pClosed, __get_win_info_ fn) { + int32_t size = taosArrayGetSize(pWins); + for (int32_t i = 0; i < size; i++) { + void* pWin = taosArrayGet(pWins, i); + SResultWindowInfo* pSeWin = fn(pWin); + if (!pSeWin->isClosed) { + int32_t code = saveResult(pSeWin->win.skey, pSeWin->pos.pageId, pSeWin->pos.offset, 0, pClosed); + pSeWin->isOutput = true; + } + } + return TSDB_CODE_SUCCESS; +} + static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { if (pOperator->status == OP_EXEC_DONE) { return NULL; @@ -2703,6 +2706,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); SHashObj* pStUpdated = taosHashInit(64, hashFn, true, HASH_NO_LOCK); SOperatorInfo* downstream = pOperator->pDownstream[0]; + SArray* pUpdated = taosArrayInit(16, POINTER_BYTES); while (1) { SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { @@ -2723,7 +2727,12 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { } taosArrayDestroy(pWins); continue; + } else if (pBlock->info.type == STREAM_GET_ALL && + pInfo->twAggSup.calTrigger == STREAM_TRIGGER_MAX_DELAY) { + getAllSessionWindow(pInfo->streamAggSup.pResultRows, pUpdated, getSessionWinInfo); + continue; } + if (isFinalSession(pInfo)) { int32_t childIndex = 0; // Todo(liuyao) get child id from SSDataBlock SOptrBasicInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); @@ -2735,15 +2744,10 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { // restore the value pOperator->status = OP_RES_TO_RETURN; - SArray* pClosed = taosArrayInit(16, POINTER_BYTES); - closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pClosed, pInfo->twAggSup.calTrigger, + closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getSessionWinInfo); - SArray* pUpdated = taosArrayInit(16, POINTER_BYTES); copyUpdateResult(pStUpdated, pUpdated, pBInfo->pRes->info.groupId); taosHashCleanup(pStUpdated); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - taosArrayAddAll(pUpdated, pClosed); - } finalizeUpdatedResult(pOperator->numOfExprs, pInfo->streamAggSup.pResultBuf, pUpdated, pInfo->binfo.rowCellInfoOffset); @@ -3067,6 +3071,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); SHashObj* pSeUpdated = taosHashInit(64, hashFn, true, HASH_NO_LOCK); SOperatorInfo* downstream = pOperator->pDownstream[0]; + SArray* pUpdated = taosArrayInit(16, POINTER_BYTES); while (1) { SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { @@ -3078,6 +3083,10 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { doClearStateWindows(&pInfo->streamAggSup, pBlock, pInfo->primaryTsIndex, &pInfo->stateCol, pInfo->stateCol.slotId, pSeUpdated, pInfo->pSeDeleted); continue; + } else if (pBlock->info.type == STREAM_GET_ALL && + pInfo->twAggSup.calTrigger == STREAM_TRIGGER_MAX_DELAY) { + getAllSessionWindow(pInfo->streamAggSup.pResultRows, pUpdated, getStateWinInfo); + continue; } doStreamStateAggImpl(pOperator, pBlock, pSeUpdated, pInfo->pSeDeleted); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); @@ -3085,15 +3094,10 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { // restore the value pOperator->status = OP_RES_TO_RETURN; - SArray* pClosed = taosArrayInit(16, POINTER_BYTES); - closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pClosed, pInfo->twAggSup.calTrigger, + closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getStateWinInfo); - SArray* pUpdated = taosArrayInit(16, POINTER_BYTES); copyUpdateResult(pSeUpdated, pUpdated, pBInfo->pRes->info.groupId); taosHashCleanup(pSeUpdated); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - taosArrayAddAll(pUpdated, pClosed); - } finalizeUpdatedResult(pOperator->numOfExprs, pInfo->streamAggSup.pResultBuf, pUpdated, pInfo->binfo.rowCellInfoOffset); diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 5a8cf562a0..c179763f9a 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -71,6 +71,7 @@ ./test.sh -f tsim/stream/basic0.sim ./test.sh -f tsim/stream/basic1.sim ./test.sh -f tsim/stream/basic2.sim +# ./test.sh -f tsim/stream/distributeInterval0.sim # ./test.sh -f tsim/stream/session0.sim # ./test.sh -f tsim/stream/session1.sim # ./test.sh -f tsim/stream/state0.sim diff --git a/tests/script/tsim/stream/distributeInterval0.sim b/tests/script/tsim/stream/distributeInterval0.sim new file mode 100644 index 0000000000..f4f3e04f0a --- /dev/null +++ b/tests/script/tsim/stream/distributeInterval0.sim @@ -0,0 +1,176 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 + +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +sql create dnode $hostname2 port 7200 + +system sh/exec.sh -n dnode2 -s start + +sql create database test vgroups 4; +sql use test; +sql create stable st(ts timestamp, a int, b int , c int, d double) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create table ts2 using st tags(2,2,2); +sql create table ts3 using st tags(3,2,2); +sql create table ts4 using st tags(4,2,2); +sql create stream stream_t1 trigger at_once into streamtST1 as select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s); + +sleep 1000 + +sql insert into ts1 values(1648791213001,1,12,3,1.0); +sql insert into ts2 values(1648791213001,1,12,3,1.0); + +sql insert into ts3 values(1648791213001,1,12,3,1.0); +sql insert into ts4 values(1648791213001,1,12,3,1.0); + +sql insert into ts1 values(1648791213002,NULL,NULL,NULL,NULL); +sql insert into ts2 values(1648791213002,NULL,NULL,NULL,NULL); + +sql insert into ts3 values(1648791213002,NULL,NULL,NULL,NULL); +sql insert into ts4 values(1648791213002,NULL,NULL,NULL,NULL); + +sql insert into ts1 values(1648791223002,2,2,3,1.1); +sql insert into ts1 values(1648791233003,3,2,3,2.1); +sql insert into ts2 values(1648791243004,4,2,43,73.1); +sql insert into ts1 values(1648791213002,24,22,23,4.1); +sql insert into ts1 values(1648791243005,4,20,3,3.1); +sql insert into ts2 values(1648791243006,4,2,3,3.1) (1648791243007,4,2,3,3.1) ; +sql insert into ts1 values(1648791243008,4,2,30,3.1) (1648791243009,4,2,3,3.1) (1648791243010,4,2,3,3.1) ; +sql insert into ts2 values(1648791243011,4,2,3,3.1) (1648791243012,34,32,33,3.1) (1648791243013,4,2,3,3.1) (1648791243014,4,2,13,3.1); +sql insert into ts1 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; +sql insert into ts2 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) (1648791233004,13,12,13,2.1) ; +sql insert into ts1 values(1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; + +sql insert into ts3 values(1648791223002,2,2,3,1.1); +sql insert into ts4 values(1648791233003,3,2,3,2.1); +sql insert into ts3 values(1648791243004,4,2,43,73.1); +sql insert into ts4 values(1648791213002,24,22,23,4.1); +sql insert into ts3 values(1648791243005,4,20,3,3.1); +sql insert into ts4 values(1648791243006,4,2,3,3.1) (1648791243007,4,2,3,3.1) ; +sql insert into ts3 values(1648791243008,4,2,30,3.1) (1648791243009,4,2,3,3.1) (1648791243010,4,2,3,3.1) ; +sql insert into ts4 values(1648791243011,4,2,3,3.1) (1648791243012,34,32,33,3.1) (1648791243013,4,2,3,3.1) (1648791243014,4,2,13,3.1); +sql insert into ts3 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; +sql insert into ts4 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) (1648791233004,13,12,13,2.1) ; +sql insert into ts3 values(1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; + +$loop_count = 0 +loop1: +sql select * from streamtST1; + +sleep 300 +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +# row 0 +if $data01 != 8 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 4 then + print =====data02=$data02 + goto loop1 +endi + +if $data03 != 4 then + print ======$data03 + return -1 +endi + +if $data04 != 52 then + print ======$data04 + return -1 +endi + +if $data05 != 13 then + print ======$data05 + return -1 +endi + +# row 1 +if $data11 != 6 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 6 then + print =====data12=$data12 + goto loop1 +endi + +if $data13 != 92 then + print ======$data13 + return -1 +endi + +if $data14 != 22 then + print ======$data14 + return -1 +endi + +if $data15 != 3 then + print ======$data15 + return -1 +endi + +# row 2 +if $data21 != 4 then + print =====data21=$data21 + goto loop1 +endi + +if $data22 != 4 then + print =====data22=$data22 + goto loop1 +endi + +if $data23 != 32 then + print ======$data23 + return -1 +endi + +if $data24 != 12 then + print ======$data24 + return -1 +endi + +if $data25 != 3 then + print ======$data25 + return -1 +endi + +# row 3 +if $data31 != 30 then + print =====data31=$data31 + goto loop1 +endi + +if $data32 != 30 then + print =====data32=$data32 + goto loop1 +endi + +if $data33 != 180 then + print ======$data33 + return -1 +endi + +if $data34 != 42 then + print ======$data34 + return -1 +endi + +if $data35 != 3 then + print ======$data35 + return -1 +endi + +sql select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5, avg(d) from st interval(10s); + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file From 5aeced162cce580553e16b20a076da8b4e479822 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Sat, 11 Jun 2022 16:44:27 +0800 Subject: [PATCH 16/16] fix: some problems of parser --- tests/script/tsim/db/alter_option.sim | 4 ++-- tests/script/tsim/db/basic6.sim | 2 +- tests/script/tsim/db/create_all_options.sim | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/script/tsim/db/alter_option.sim b/tests/script/tsim/db/alter_option.sim index 7444511504..12babea097 100644 --- a/tests/script/tsim/db/alter_option.sim +++ b/tests/script/tsim/db/alter_option.sim @@ -95,7 +95,7 @@ endi if $data6_db != 345600 then # days return -1 endi -if $data7_db != 1440000,1440000,1440000 then # keep +if $data7_db != 1440000m,1440000m,1440000m then # keep return -1 endi if $data8_db != 96 then # buffer @@ -232,7 +232,7 @@ print ============== modify keep sql alter database db keep 2400 sql show databases print keep $data7_db -if $data7_db != 3456000,3456000,3456000 then +if $data7_db != 3456000m,3456000m,3456000m then return -1 endi diff --git a/tests/script/tsim/db/basic6.sim b/tests/script/tsim/db/basic6.sim index 9075ebb2e8..142460f214 100644 --- a/tests/script/tsim/db/basic6.sim +++ b/tests/script/tsim/db/basic6.sim @@ -37,7 +37,7 @@ endi if $data26 != 2880 then return -1 endi -if $data27 != 14400,14400,14400 then +if $data27 != 14400m,14400m,14400m then return -1 endi #if $data28 != 32 then diff --git a/tests/script/tsim/db/create_all_options.sim b/tests/script/tsim/db/create_all_options.sim index 88f0378d61..fac385a9a6 100644 --- a/tests/script/tsim/db/create_all_options.sim +++ b/tests/script/tsim/db/create_all_options.sim @@ -116,7 +116,7 @@ endi if $data6_db != 14400 then # days return -1 endi -if $data7_db != 5256000,5256000,5256000 then # keep +if $data7_db != 5256000m,5256000m,5256000m then # keep return -1 endi if $data8_db != 96 then # buffer