From ae7550eb8639ac616d86ff5dcee5a7d23d20ad5b Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 12 Aug 2022 21:00:48 +0800 Subject: [PATCH 01/18] enh: rsma exec in async mode --- include/common/tmsg.h | 4 + source/dnode/vnode/inc/vnode.h | 1 + source/dnode/vnode/src/inc/sma.h | 12 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/sma/smaEnv.c | 5 +- source/dnode/vnode/src/sma/smaRollup.c | 80 +++++++++-- source/dnode/vnode/src/tsdb/tsdbRead.c | 26 ---- source/dnode/vnode/src/vnd/vnodeQuery.c | 19 +++ source/dnode/vnode/src/vnd/vnodeSvr.c | 4 +- source/libs/executor/inc/executil.h | 3 +- source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/inc/tsimplehash.h | 42 ++++-- source/libs/executor/src/executil.c | 9 +- source/libs/executor/src/executorimpl.c | 22 +-- source/libs/executor/src/scanoperator.c | 4 +- source/libs/executor/src/timewindowoperator.c | 54 ++++---- source/libs/executor/src/tsimplehash.c | 126 ++++++++++++------ .../libs/executor/test/tSimpleHashTests.cpp | 14 +- 18 files changed, 273 insertions(+), 156 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index df127af256..24092c7e44 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2658,6 +2658,10 @@ typedef struct { SEpSet epSet; } SVgEpSet; +typedef struct { + int32_t padding; +} SRSmaExecMsg; + typedef struct { int64_t suid; int8_t level; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 18a7583f4c..e93a62b27b 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -63,6 +63,7 @@ void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId); int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen); int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list); int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list); +int32_t vnodeGetStbIdList(SVnode *pVnode, int64_t suid, SArray* list); void *vnodeGetIdx(SVnode *pVnode); void *vnodeGetIvtIdx(SVnode *pVnode); diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 944d7759b2..fb8352d543 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -122,17 +122,19 @@ struct SRSmaInfoItem { }; struct SRSmaInfo { - STSchema *pTSchema; - int64_t suid; - int64_t refId; // refId of SRSmaStat - int8_t delFlag; + STSchema *pTSchema; + STaosQueue *queue; // buffer queue of SubmitReq + STaosQall *qall; + int64_t suid; + int64_t refId; // refId of SRSmaStat + int8_t delFlag; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t void *iTaskInfo[TSDB_RETENTION_L2]; // immutable }; -#define RSMA_INFO_HEAD_LEN 32 +#define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) #define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) #define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) #define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 43bb92ec23..513f4da33c 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -169,7 +169,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRecoverRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); -int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list); + SSubmitReq* tdBlockToSubmit(SVnode* pVnode, const SArray* pBlocks, const STSchema* pSchema, bool createTb, int64_t suid, const char* stbFullName, int32_t vgId, SBatchDeleteReq* pDeleteReq); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index ccb6ad3a72..50db1123ae 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -264,8 +264,6 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { atomic_store_8(RSMA_TRIGGER_STAT(pStat), TASK_TRIGGER_STAT_CANCELLED); // step 2: destroy the rsma info and associated fetch tasks - // TODO: use taosHashSetFreeFp when taosHashSetFreeFp is ready. -#if 1 if (taosHashGetSize(RSMA_INFO_HASH(pStat)) > 0) { void *infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), NULL); while (infoHash) { @@ -274,7 +272,6 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), infoHash); } } -#endif taosHashCleanup(RSMA_INFO_HASH(pStat)); // step 3: wait all triggered fetch tasks finished @@ -292,7 +289,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { nLoops = 0; } } - + // step 4: free pStat taosMemoryFreeClear(pStat); } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index b7a2efd489..1660223d77 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -139,6 +139,14 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { if (isDeepFree) { taosMemoryFreeClear(pInfo->pTSchema); } + + if (isDeepFree) { + if (pInfo->queue) taosCloseQueue(pInfo->queue); + if (pInfo->qall) taosFreeQall(pInfo->qall); + pInfo->queue = NULL; + pInfo->qall = NULL; + } + taosMemoryFree(pInfo); } @@ -179,7 +187,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids) for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - if ((qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, true) < 0)) { + if (((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, true)) < 0)) { tdReleaseRSmaInfo(pSma, pRSmaInfo); smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i, terrstr()); @@ -351,6 +359,12 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con goto _err; } pRSmaInfo->pTSchema = pTSchema; + if (!(pRSmaInfo->queue = taosOpenQueue())) { + goto _err; + } + if (!(pRSmaInfo->qall = taosAllocateQall())) { + goto _err; + } pRSmaInfo->suid = suid; pRSmaInfo->refId = RSMA_REF_ID(pStat); T_REF_INIT_VAL(pRSmaInfo, 1); @@ -615,7 +629,7 @@ static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSm while (1) { uint64_t ts; int32_t code = qExecTaskOpt(taskInfo, pResList, &ts); - if (code < 0) { + if (code < 0) { if (code == TSDB_CODE_QRY_IN_EXEC) { break; } else { @@ -662,10 +676,9 @@ static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSm goto _err; } taosMemoryFreeClear(pReq); - + smaDebug("vgId:%d, process submit req for rsma table %" PRIi64 " level %" PRIi8 " version:%" PRIi64, SMA_VID(pSma), suid, pItem->level, output->info.version); - } } @@ -841,30 +854,45 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, void *pMsg, int32_t inputType) { return TSDB_CODE_SUCCESS; } +/** + * @brief retrieve rsma meta and init + * + * @param pSma + * @param nTables number of tables of rsma + * @return int32_t + */ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { - SVnode *pVnode = pSma->pVnode; + SVnode *pVnode = pSma->pVnode; + SArray *suidList = NULL; + STbUidStore uidStore = {0}; + SMetaReader mr = {0}; - SArray *suidList = taosArrayInit(1, sizeof(tb_uid_t)); - if (tsdbGetStbIdList(SMA_META(pSma), 0, suidList) < 0) { - taosArrayDestroy(suidList); + if (!(suidList = taosArrayInit(1, sizeof(tb_uid_t)))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + if (vnodeGetStbIdList(pSma->pVnode, 0, suidList) < 0) { smaError("vgId:%d, failed to restore rsma env since get stb id list error: %s", TD_VID(pVnode), terrstr()); - return TSDB_CODE_FAILED; + goto _err; } int64_t arrSize = taosArrayGetSize(suidList); - if (nTables) { - *nTables = arrSize; - } - if (arrSize == 0) { + if (nTables) { + *nTables = 0; + } taosArrayDestroy(suidList); smaDebug("vgId:%d, no need to restore rsma env since empty stb id list", TD_VID(pVnode)); return TSDB_CODE_SUCCESS; } - SMetaReader mr = {0}; + int64_t nRsmaTables = 0; metaReaderInit(&mr, SMA_META(pSma), 0); + if (!(uidStore.tbUids = taosArrayInit(1024, sizeof(tb_uid_t)))) { + goto _err; + } for (int64_t i = 0; i < arrSize; ++i) { tb_uid_t suid = *(tb_uid_t *)taosArrayGet(suidList, i); smaDebug("vgId:%d, rsma restore, suid is %" PRIi64, TD_VID(pVnode), suid); @@ -877,6 +905,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { ASSERT(mr.me.type == TSDB_SUPER_TABLE); ASSERT(mr.me.uid == suid); if (TABLE_IS_ROLLUP(mr.me.flags)) { + ++nRsmaTables; SRSmaParam *param = &mr.me.stbEntry.rsmaParam; for (int i = 0; i < TSDB_RETENTION_L2; ++i) { smaDebug("vgId:%d, rsma restore, table:%" PRIi64 " level:%d, maxdelay:%" PRIi64 " watermark:%" PRIi64 @@ -887,17 +916,40 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { smaError("vgId:%d, rsma restore env failed for %" PRIi64 " since %s", TD_VID(pVnode), suid, terrstr()); goto _err; } + + // reload all ctbUids for suid + uidStore.suid = suid; + if (vnodeGetCtbIdList(pVnode, suid, uidStore.tbUids) < 0) { + smaError("vgId:%d, rsma restore, get ctb idlist failed for %" PRIi64 " since %s", TD_VID(pVnode), suid, + terrstr()); + goto _err; + } + + if (tdUpdateTbUidList(pVnode->pSma, &uidStore) < 0) { + smaError("vgId:%d, rsma restore, update tb uid list failed for %" PRIi64 " since %s", TD_VID(pVnode), suid, + terrstr()); + goto _err; + } + + taosArrayClear(uidStore.tbUids); + smaDebug("vgId:%d, rsma restore env success for %" PRIi64, TD_VID(pVnode), suid); } } metaReaderClear(&mr); taosArrayDestroy(suidList); + tdUidStoreDestory(&uidStore); + + if (nTables) { + *nTables = nRsmaTables; + } return TSDB_CODE_SUCCESS; _err: metaReaderClear(&mr); taosArrayDestroy(suidList); + tdUidStoreDestory(&uidStore); return TSDB_CODE_FAILED; } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 0831f3d75a..335b311d00 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2585,32 +2585,6 @@ void* tsdbGetIvtIdx(SMeta* pMeta) { uint64_t getReaderMaxVersion(STsdbReader* pReader) { return pReader->verRange.maxVer; } -/** - * @brief Get all suids since suid - * - * @param pMeta - * @param suid return all suids in one vnode if suid is 0 - * @param list - * @return int32_t - */ -int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) { - SMStbCursor* pCur = metaOpenStbCursor(pMeta, suid); - if (!pCur) { - return TSDB_CODE_FAILED; - } - - while (1) { - tb_uid_t id = metaStbCursorNext(pCur); - if (id == 0) { - break; - } - - taosArrayPush(list, &id); - } - - metaCloseStbCursor(pCur); - return TSDB_CODE_SUCCESS; -} // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader, diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index d55f1796ad..8d799e919d 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -424,6 +424,25 @@ int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list) { return TSDB_CODE_SUCCESS; } +int32_t vnodeGetStbIdList(SVnode* pVnode, int64_t suid, SArray* list) { + SMStbCursor* pCur = metaOpenStbCursor(pVnode->pMeta, suid); + if (!pCur) { + return TSDB_CODE_FAILED; + } + + while (1) { + tb_uid_t id = metaStbCursorNext(pCur); + if (id == 0) { + break; + } + + taosArrayPush(list, &id); + } + + metaCloseStbCursor(pCur); + return TSDB_CODE_SUCCESS; +} + int32_t vnodeGetCtbNum(SVnode *pVnode, int64_t suid, int64_t *num) { SMCtbCursor *pCur = metaOpenCtbCursor(pVnode->pMeta, suid); if (!pCur) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index d5c5e18668..3a25933ec4 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -522,7 +522,9 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t version, void *pR } tqUpdateTbUidList(pVnode->pTq, tbUids, true); - tdUpdateTbUidList(pVnode->pSma, pStore); + if (tdUpdateTbUidList(pVnode->pSma, pStore) < 0) { + goto _exit; + } tdUidStoreFree(pStore); // prepare rsp diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 52c73f85f5..58b2c1b095 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -22,6 +22,7 @@ #include "tbuffer.h" #include "tcommon.h" #include "tpagedbuf.h" +#include "tsimplehash.h" #define SET_RES_WINDOW_KEY(_k, _ori, _len, _uid) \ do { \ @@ -102,7 +103,7 @@ static FORCE_INLINE void setResultBufPageDirty(SDiskbasedBuf* pBuf, SResultRowPo setBufPageDirty(pPage, true); } -void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int32_t order); +void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t order); void cleanupGroupResInfo(SGroupResInfo* pGroupResInfo); void initMultiResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 410bab341c..b7b0d5123e 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -296,7 +296,7 @@ enum { }; typedef struct SAggSupporter { - SHashObj* pResultRowHashTable; // quick locate the window object for each result + SSHashObj* pResultRowHashTable; // quick locate the window object for each result char* keyBuf; // window key buffer SDiskbasedBuf* pResultBuf; // query result buffer based on blocked-wised disk file int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row diff --git a/source/libs/executor/inc/tsimplehash.h b/source/libs/executor/inc/tsimplehash.h index a56f8e8c04..4c5a80e2f1 100644 --- a/source/libs/executor/inc/tsimplehash.h +++ b/source/libs/executor/inc/tsimplehash.h @@ -17,7 +17,6 @@ #define TDENGINE_TSIMPLEHASH_H #include "tarray.h" -#include "tlockfree.h" #ifdef __cplusplus extern "C" { @@ -27,6 +26,10 @@ typedef uint32_t (*_hash_fn_t)(const char *, uint32_t); typedef int32_t (*_equal_fn_t)(const void *, const void *, size_t len); typedef void (*_hash_free_fn_t)(void *); +/** + * @brief single thread hash + * + */ typedef struct SSHashObj SSHashObj; /** @@ -36,7 +39,7 @@ typedef struct SSHashObj SSHashObj; * @param fn hash function to generate the hash value * @return */ -SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn, size_t keyLen, size_t dataLen); +SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn); /** * return the size of hash table @@ -48,22 +51,26 @@ int32_t tSimpleHashGetSize(const SSHashObj *pHashObj); int32_t tSimpleHashPrint(const SSHashObj *pHashObj); /** - * put element into hash table, if the element with the same key exists, update it - * @param pHashObj - * @param key - * @param data - * @return + * @brief put element into hash table, if the element with the same key exists, update it + * + * @param pHashObj + * @param key + * @param keyLen + * @param data + * @param dataLen + * @return int32_t */ -int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data); +int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen); /** * return the payload data with the specified key * * @param pHashObj * @param key + * @param keyLen * @return */ -void *tSimpleHashGet(SSHashObj *pHashObj, const void *key); +void *tSimpleHashGet(SSHashObj *pHashObj, const void *key, size_t keyLen); /** * remove item with the specified key @@ -71,7 +78,7 @@ void *tSimpleHashGet(SSHashObj *pHashObj, const void *key); * @param key * @param keyLen */ -int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key); +int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key, size_t keyLen); /** * Clear the hash table. @@ -98,7 +105,7 @@ size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj); * @param keyLen * @return */ -void *tSimpleHashGetKey(const SSHashObj* pHashObj, void *data, size_t* keyLen); +void *tSimpleHashGetKey(void *data, size_t* keyLen); /** * Create the hash table iterator @@ -109,7 +116,18 @@ void *tSimpleHashGetKey(const SSHashObj* pHashObj, void *data, size_t* keyLen); */ void *tSimpleHashIterate(const SSHashObj *pHashObj, void *data, int32_t *iter); +/** + * Create the hash table iterator + * + * @param pHashObj + * @param data + * @param key + * @param iter + * @return void* + */ +void *tSimpleHashIterateKV(const SSHashObj *pHashObj, void *data, void **key, int32_t *iter); + #ifdef __cplusplus } #endif -#endif // TDENGINE_TSIMPLEHASH_H +#endif // TDENGINE_TSIMPLEHASH_H \ No newline at end of file diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 34247d3b47..2e6bd312f3 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -97,7 +97,7 @@ int32_t resultrowComparAsc(const void* p1, const void* p2) { static int32_t resultrowComparDesc(const void* p1, const void* p2) { return resultrowComparAsc(p2, p1); } -void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int32_t order) { +void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t order) { if (pGroupResInfo->pRows != NULL) { taosArrayDestroy(pGroupResInfo->pRows); } @@ -106,9 +106,10 @@ void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int void* pData = NULL; pGroupResInfo->pRows = taosArrayInit(10, POINTER_BYTES); - size_t keyLen = 0; - while ((pData = taosHashIterate(pHashmap, pData)) != NULL) { - void* key = taosHashGetKey(pData, &keyLen); + size_t keyLen = 0; + int32_t iter = 0; + while ((pData = tSimpleHashIterate(pHashmap, pData, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pData, &keyLen); SResKeyPos* p = taosMemoryMalloc(keyLen + sizeof(SResultRowPosition)); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index be129cb6b4..4709d1818c 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -250,7 +250,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR SET_RES_WINDOW_KEY(pSup->keyBuf, pData, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); SResultRow* pResult = NULL; @@ -292,7 +292,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR // add a new result set for a new group SResultRowPosition pos = {.pageId = pResult->pageId, .offset = pResult->offset}; - taosHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos, + tSimpleHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos, sizeof(SResultRowPosition)); } @@ -301,7 +301,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR // too many time window in query if (pTaskInfo->execModel == OPTR_EXEC_MODEL_BATCH && - taosHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) { + tSimpleHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW); } @@ -3013,7 +3013,7 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len } SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info); SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo)); - int32_t size = taosHashGetSize(pSup->pResultRowHashTable); + int32_t size = tSimpleHashGetSize(pSup->pResultRowHashTable); size_t keyLen = sizeof(uint64_t) * 2; // estimate the key length int32_t totalSize = sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize); @@ -3041,9 +3041,10 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len setBufPageDirty(pPage, true); releaseBufPage(pSup->pResultBuf, pPage); - void* pIter = taosHashIterate(pSup->pResultRowHashTable, NULL); + int32_t iter = 0; + void* pIter = tSimpleHashIterate(pSup->pResultRowHashTable, NULL, &iter); while (pIter) { - void* key = taosHashGetKey(pIter, &keyLen); + void* key = tSimpleHashGetKey(pIter, &keyLen); SResultRowPosition* p1 = (SResultRowPosition*)pIter; pPage = (SFilePage*)getBufPage(pSup->pResultBuf, p1->pageId); @@ -3075,7 +3076,7 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len memcpy(*result + offset, pRow, pSup->resultRowSize); offset += pSup->resultRowSize; - pIter = taosHashIterate(pSup->pResultRowHashTable, pIter); + pIter = tSimpleHashIterate(pSup->pResultRowHashTable, pIter, &iter); } *(int32_t*)(*result) = offset; @@ -3110,7 +3111,7 @@ int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) { // add a new result set for a new group SResultRowPosition pos = {.pageId = resultRow->pageId, .offset = resultRow->offset}; - taosHashPut(pSup->pResultRowHashTable, result + offset, keyLen, &pos, sizeof(SResultRowPosition)); + tSimpleHashPut(pSup->pResultRowHashTable, result + offset, keyLen, &pos, sizeof(SResultRowPosition)); offset += keyLen; int32_t valueLen = *(int32_t*)(result + offset); @@ -3407,7 +3408,8 @@ int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t n pAggSup->resultRowSize = getResultRowSize(pCtx, numOfOutput); pAggSup->keyBuf = taosMemoryCalloc(1, keyBufSize + POINTER_BYTES + sizeof(int64_t)); - pAggSup->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK); + // pAggSup->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK); + pAggSup->pResultRowHashTable = tSimpleHashInit(100000, hashFn); if (pAggSup->keyBuf == NULL || pAggSup->pResultRowHashTable == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -3433,7 +3435,7 @@ int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t n void cleanupAggSup(SAggSupporter* pAggSup) { taosMemoryFreeClear(pAggSup->keyBuf); - taosHashCleanup(pAggSup->pResultRowHashTable); + tSimpleHashCleanup(pAggSup->pResultRowHashTable); destroyDiskbasedBuf(pAggSup->pResultBuf); } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d8de8df163..c404fca597 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -178,8 +178,8 @@ static SResultRow* getTableGroupOutputBuf(SOperatorInfo* pOperator, uint64_t gro STableScanInfo* pTableScanInfo = pOperator->info; - SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pTableScanInfo->pdInfo.pAggSup->pResultRowHashTable, buf, GET_RES_WINDOW_KEY_LEN(sizeof(groupId))); + SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(pTableScanInfo->pdInfo.pAggSup->pResultRowHashTable, buf, + GET_RES_WINDOW_KEY_LEN(sizeof(groupId))); if (p1 == NULL) { return NULL; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 6778e97d7a..abc1a76d74 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1383,7 +1383,7 @@ bool doClearWindow(SAggSupporter* pAggSup, SExprSupp* pSup, char* pData, int16_t int32_t numOfOutput) { SET_RES_WINDOW_KEY(pAggSup->keyBuf, pData, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)tSimpleHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); if (!p1) { // window has been closed return false; @@ -1396,14 +1396,14 @@ bool doDeleteIntervalWindow(SAggSupporter* pAggSup, TSKEY ts, uint64_t groupId) size_t bytes = sizeof(TSKEY); SET_RES_WINDOW_KEY(pAggSup->keyBuf, &ts, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)tSimpleHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); if (!p1) { // window has been closed return false; } // SFilePage* bufPage = getBufPage(pAggSup->pResultBuf, p1->pageId); // dBufSetBufPageRecycled(pAggSup->pResultBuf, bufPage); - taosHashRemove(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + tSimpleHashRemove(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); return true; } @@ -1453,11 +1453,12 @@ static void doClearWindows(SAggSupporter* pAggSup, SExprSupp* pSup1, SInterval* } } -static int32_t getAllIntervalWindow(SHashObj* pHashMap, SHashObj* resWins) { +static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SHashObj* resWins) { void* pIte = NULL; size_t keyLen = 0; - while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { - void* key = taosHashGetKey(pIte, &keyLen); + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); @@ -1470,16 +1471,18 @@ static int32_t getAllIntervalWindow(SHashObj* pHashMap, SHashObj* resWins) { return TSDB_CODE_SUCCESS; } -static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, +static int32_t closeIntervalWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, SHashObj* pPullDataMap, SHashObj* closeWins, SArray* pRecyPages, SDiskbasedBuf* pDiscBuf) { qDebug("===stream===close interval window"); void* pIte = NULL; - size_t keyLen = 0; - while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { - void* key = taosHashGetKey(pIte, &keyLen); + void* key = NULL; + size_t keyLen = GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY)); + int32_t iter = 0; + while ((pIte = tSimpleHashIterateKV(pHashMap, pIte, &key, &iter)) != NULL) { + // void* key = tSimpleHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; - ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); + // ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); STimeWindow win; win.skey = ts; @@ -1515,7 +1518,7 @@ static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, } char keyBuf[GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))]; SET_RES_WINDOW_KEY(keyBuf, &ts, sizeof(TSKEY), groupId); - taosHashRemove(pHashMap, keyBuf, keyLen); + tSimpleHashRemove(pHashMap, keyBuf, keyLen); } } return TSDB_CODE_SUCCESS; @@ -2853,7 +2856,7 @@ bool hasIntervalWindow(SAggSupporter* pSup, TSKEY ts, uint64_t groupId) { int32_t bytes = sizeof(TSKEY); SET_RES_WINDOW_KEY(pSup->keyBuf, &ts, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); return p1 != NULL; } @@ -2894,8 +2897,9 @@ static void rebuildIntervalWindow(SStreamFinalIntervalOperatorInfo* pInfo, SExpr bool isDeletedWindow(STimeWindow* pWin, uint64_t groupId, SAggSupporter* pSup) { SET_RES_WINDOW_KEY(pSup->keyBuf, &pWin->skey, sizeof(int64_t), groupId); - SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, - GET_RES_WINDOW_KEY_LEN(sizeof(int64_t))); + SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, + GET_RES_WINDOW_KEY_LEN(sizeof(int64_t))); + return p1 == NULL; } @@ -3023,7 +3027,7 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc } static void clearStreamIntervalOperator(SStreamFinalIntervalOperatorInfo* pInfo) { - taosHashClear(pInfo->aggSup.pResultRowHashTable); + tSimpleHashClear(pInfo->aggSup.pResultRowHashTable); clearDiskbasedBuf(pInfo->aggSup.pResultBuf); cleanupResultRowInfo(&pInfo->binfo.resultRowInfo); initResultRowInfo(&pInfo->binfo.resultRowInfo); @@ -4938,14 +4942,14 @@ static int32_t outputMergeAlignedIntervalResult(SOperatorInfo* pOperatorInfo, ui SExprSupp* pSup = &pOperatorInfo->exprSupp; SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &wstartTs, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, - GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet( + iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); ASSERT(p1 != NULL); finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, pSup->pCtx, pSup->pExprInfo, pSup->numOfExprs, pSup->rowEntryInfoOffset, pResultBlock, pTaskInfo); - taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); - ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); + tSimpleHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); return TSDB_CODE_SUCCESS; } @@ -4968,7 +4972,7 @@ static void doMergeAlignedIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultR // there is an result exists if (miaInfo->curTs != INT64_MIN) { - ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); + ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); if (ts != miaInfo->curTs) { outputMergeAlignedIntervalResult(pOperatorInfo, tableGroupId, pResultBlock, miaInfo->curTs); @@ -4976,7 +4980,7 @@ static void doMergeAlignedIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultR } } else { miaInfo->curTs = ts; - ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); + ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); } STimeWindow win = {0}; @@ -5053,7 +5057,7 @@ static void doMergeAlignedIntervalAgg(SOperatorInfo* pOperator) { if (pBlock == NULL) { // close last unfinalized time window if (miaInfo->curTs != INT64_MIN) { - ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); + ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); outputMergeAlignedIntervalResult(pOperator, miaInfo->groupId, pRes, miaInfo->curTs); miaInfo->curTs = INT64_MIN; } @@ -5231,12 +5235,12 @@ static int32_t finalizeWindowResult(SOperatorInfo* pOperatorInfo, uint64_t table SExprSupp* pExprSup = &pOperatorInfo->exprSupp; SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &win->skey, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, + SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); ASSERT(p1 != NULL); finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, pExprSup->pCtx, pExprSup->pExprInfo, pExprSup->numOfExprs, pExprSup->rowEntryInfoOffset, pResultBlock, pTaskInfo); - taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + tSimpleHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/executor/src/tsimplehash.c b/source/libs/executor/src/tsimplehash.c index 7989ad2b5a..dbb50f958d 100644 --- a/source/libs/executor/src/tsimplehash.c +++ b/source/libs/executor/src/tsimplehash.c @@ -14,7 +14,6 @@ */ #include "tsimplehash.h" -#include "os.h" #include "taoserror.h" #define SHASH_DEFAULT_LOAD_FACTOR 0.75 @@ -31,19 +30,21 @@ taosMemoryFreeClear(_n); \ } while (0); +#pragma pack(push, 4) typedef struct SHNode { struct SHNode *next; + uint32_t keyLen : 20; + uint32_t dataLen : 12; char data[]; } SHNode; +#pragma pack(pop) struct SSHashObj { SHNode **hashList; size_t capacity; // number of slots - int64_t size; // number of elements in hash table - _hash_fn_t hashFp; // hash function - _equal_fn_t equalFp; // equal function - int32_t keyLen; - int32_t dataLen; + int64_t size; // number of elements in hash table + _hash_fn_t hashFp; // hash function + _equal_fn_t equalFp; // equal function }; static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { @@ -54,7 +55,7 @@ static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { return i; } -SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn, size_t keyLen, size_t dataLen) { +SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { ASSERT(fn != NULL); if (capacity == 0) { @@ -74,8 +75,6 @@ SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn, size_t keyLen, size_t pHashObj->hashFp = fn; ASSERT((pHashObj->capacity & (pHashObj->capacity - 1)) == 0); - pHashObj->keyLen = keyLen; - pHashObj->dataLen = dataLen; pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *)); if (!pHashObj->hashList) { @@ -93,16 +92,17 @@ int32_t tSimpleHashGetSize(const SSHashObj *pHashObj) { return (int32_t)atomic_load_64((int64_t *)&pHashObj->size); } -static SHNode *doCreateHashNode(const void *key, size_t keyLen, const void *pData, size_t dsize, uint32_t hashVal) { - SHNode *pNewNode = taosMemoryMalloc(sizeof(SHNode) + keyLen + dsize); +static SHNode *doCreateHashNode(const void *key, size_t keyLen, const void *data, size_t dataLen, uint32_t hashVal) { + SHNode *pNewNode = taosMemoryMalloc(sizeof(SHNode) + keyLen + dataLen); if (!pNewNode) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - + pNewNode->keyLen = keyLen; + pNewNode->dataLen = dataLen; pNewNode->next = NULL; - memcpy(GET_SHASH_NODE_DATA(pNewNode), pData, dsize); - memcpy(GET_SHASH_NODE_KEY(pNewNode, dsize), key, keyLen); + memcpy(GET_SHASH_NODE_DATA(pNewNode), data, dataLen); + memcpy(GET_SHASH_NODE_KEY(pNewNode, dataLen), key, keyLen); return pNewNode; } @@ -141,8 +141,8 @@ static void taosHashTableResize(SSHashObj *pHashObj) { SHNode *pPrev = NULL; while (pNode != NULL) { - void *key = GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen); - uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen); + void *key = GET_SHASH_NODE_KEY(pNode, pNode->dataLen); + uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pNode->keyLen); int32_t newIdx = HASH_INDEX(hashVal, pHashObj->capacity); pNext = pNode->next; @@ -170,12 +170,12 @@ static void taosHashTableResize(SSHashObj *pHashObj) { // ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0); } -int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data) { +int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen) { if (!pHashObj || !key) { return -1; } - uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen); + uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); // need the resize process, write lock applied if (SHASH_NEED_RESIZE(pHashObj)) { @@ -186,7 +186,7 @@ int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data) { SHNode *pNode = pHashObj->hashList[slot]; if (!pNode) { - SHNode *pNewNode = doCreateHashNode(key, pHashObj->keyLen, data, pHashObj->dataLen, hashVal); + SHNode *pNewNode = doCreateHashNode(key, keyLen, data, dataLen, hashVal); if (!pNewNode) { return -1; } @@ -197,14 +197,14 @@ int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data) { } while (pNode) { - if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen), key, pHashObj->keyLen) == 0) { + if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { break; } pNode = pNode->next; } if (!pNode) { - SHNode *pNewNode = doCreateHashNode(key, pHashObj->keyLen, data, pHashObj->dataLen, hashVal); + SHNode *pNewNode = doCreateHashNode(key, keyLen, data, dataLen, hashVal); if (!pNewNode) { return -1; } @@ -212,16 +212,16 @@ int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data) { pHashObj->hashList[slot] = pNewNode; atomic_add_fetch_64(&pHashObj->size, 1); } else { // update data - memcpy(GET_SHASH_NODE_DATA(pNode), data, pHashObj->dataLen); + memcpy(GET_SHASH_NODE_DATA(pNode), data, dataLen); } return 0; } -static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void *key, int32_t index) { +static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void *key, size_t keyLen, int32_t index) { SHNode *pNode = pHashObj->hashList[index]; while (pNode) { - if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen), key, pHashObj->keyLen) == 0) { + if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { break; } @@ -233,12 +233,12 @@ static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) { return tSimpleHashGetSize(pHashObj) == 0; } -void *tSimpleHashGet(SSHashObj *pHashObj, const void *key) { +void *tSimpleHashGet(SSHashObj *pHashObj, const void *key, size_t keyLen) { if (!pHashObj || taosHashTableEmpty(pHashObj) || !key) { return NULL; } - uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen); + uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); SHNode *pNode = pHashObj->hashList[slot]; @@ -247,7 +247,7 @@ void *tSimpleHashGet(SSHashObj *pHashObj, const void *key) { } char *data = NULL; - pNode = doSearchInEntryList(pHashObj, key, slot); + pNode = doSearchInEntryList(pHashObj, key, keyLen, slot); if (pNode != NULL) { data = GET_SHASH_NODE_DATA(pNode); } @@ -255,19 +255,19 @@ void *tSimpleHashGet(SSHashObj *pHashObj, const void *key) { return data; } -int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key) { +int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key, size_t keyLen) { if (!pHashObj || !key) { return TSDB_CODE_FAILED; } - uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen); + uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); SHNode *pNode = pHashObj->hashList[slot]; SHNode *pPrev = NULL; while (pNode) { - if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen), key, pHashObj->keyLen) == 0) { + if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { if (!pPrev) { pHashObj->hashList[slot] = pNode->next; } else { @@ -312,6 +312,7 @@ void tSimpleHashCleanup(SSHashObj *pHashObj) { tSimpleHashClear(pHashObj); taosMemoryFreeClear(pHashObj->hashList); + taosMemoryFree(pHashObj); } size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj) { @@ -322,23 +323,13 @@ size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj) { return (pHashObj->capacity * sizeof(void *)) + sizeof(SHNode) * tSimpleHashGetSize(pHashObj) + sizeof(SSHashObj); } -void *tSimpleHashGetKey(const SSHashObj *pHashObj, void *data, size_t *keyLen) { -#if 0 - int32_t offset = offsetof(SHNode, data); - SHNode *node = ((SHNode *)(char *)data - offset); +void *tSimpleHashGetKey(void *data, size_t *keyLen) { + SHNode *node = (SHNode *)((char *)data - offsetof(SHNode, data)); if (keyLen) { - *keyLen = pHashObj->keyLen; + *keyLen = node->keyLen; } - return POINTER_SHIFT(data, pHashObj->dataLen); - - return GET_SHASH_NODE_KEY(node, pHashObj->dataLen); -#endif - if (keyLen) { - *keyLen = pHashObj->keyLen; - } - - return POINTER_SHIFT(data, pHashObj->dataLen); + return POINTER_SHIFT(data, node->dataLen); } void *tSimpleHashIterate(const SSHashObj *pHashObj, void *data, int32_t *iter) { @@ -376,5 +367,52 @@ void *tSimpleHashIterate(const SSHashObj *pHashObj, void *data, int32_t *iter) { return GET_SHASH_NODE_DATA(pNode); } + return NULL; +} + +void *tSimpleHashIterateKV(const SSHashObj *pHashObj, void *data, void **key, int32_t *iter) { + if (!pHashObj) { + return NULL; + } + + SHNode *pNode = NULL; + + if (!data) { + for (int32_t i = 0; i < pHashObj->capacity; ++i) { + pNode = pHashObj->hashList[i]; + if (!pNode) { + continue; + } + *iter = i; + if (key) { + *key = GET_SHASH_NODE_KEY(pNode, pNode->dataLen); + } + return GET_SHASH_NODE_DATA(pNode); + } + return NULL; + } + + pNode = (SHNode *)((char *)data - offsetof(SHNode, data)); + + if (pNode->next) { + if (key) { + *key = GET_SHASH_NODE_KEY(pNode->next, pNode->next->dataLen); + } + return GET_SHASH_NODE_DATA(pNode->next); + } + + ++(*iter); + for (int32_t i = *iter; i < pHashObj->capacity; ++i) { + pNode = pHashObj->hashList[i]; + if (!pNode) { + continue; + } + *iter = i; + if (key) { + *key = GET_SHASH_NODE_KEY(pNode, pNode->dataLen); + } + return GET_SHASH_NODE_DATA(pNode); + } + return NULL; } \ No newline at end of file diff --git a/source/libs/executor/test/tSimpleHashTests.cpp b/source/libs/executor/test/tSimpleHashTests.cpp index a17a7146ea..acb6d434b4 100644 --- a/source/libs/executor/test/tSimpleHashTests.cpp +++ b/source/libs/executor/test/tSimpleHashTests.cpp @@ -32,31 +32,33 @@ TEST(testCase, tSimpleHashTest) { SSHashObj *pHashObj = - tSimpleHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), sizeof(int64_t), sizeof(int64_t)); + tSimpleHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); assert(pHashObj != nullptr); ASSERT_EQ(0, tSimpleHashGetSize(pHashObj)); + size_t keyLen = sizeof(int64_t); + size_t dataLen = sizeof(int64_t); + int64_t originKeySum = 0; for (int64_t i = 1; i <= 100; ++i) { originKeySum += i; - tSimpleHashPut(pHashObj, (const void *)&i, (const void *)&i); + tSimpleHashPut(pHashObj, (const void *)&i, keyLen, (const void *)&i, dataLen); ASSERT_EQ(i, tSimpleHashGetSize(pHashObj)); } for (int64_t i = 1; i <= 100; ++i) { - void *data = tSimpleHashGet(pHashObj, (const void *)&i); + void *data = tSimpleHashGet(pHashObj, (const void *)&i, keyLen); ASSERT_EQ(i, *(int64_t *)data); } - void *data = NULL; int32_t iter = 0; int64_t keySum = 0; int64_t dataSum = 0; while ((data = tSimpleHashIterate(pHashObj, data, &iter))) { - void *key = tSimpleHashGetKey(pHashObj, data, NULL); + void *key = tSimpleHashGetKey(data, NULL); keySum += *(int64_t *)key; dataSum += *(int64_t *)data; } @@ -65,7 +67,7 @@ TEST(testCase, tSimpleHashTest) { ASSERT_EQ(keySum, originKeySum); for (int64_t i = 1; i <= 100; ++i) { - tSimpleHashRemove(pHashObj, (const void *)&i); + tSimpleHashRemove(pHashObj, (const void *)&i, keyLen); ASSERT_EQ(100 - i, tSimpleHashGetSize(pHashObj)); } From dab6c81769d5292f9b149d6d0cc0d443e925a7d1 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 15 Aug 2022 00:16:44 +0800 Subject: [PATCH 02/18] enh: rsma batch process --- include/common/tmsg.h | 2 +- include/common/tmsgdef.h | 1 + include/util/taoserror.h | 1 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/vnode/src/inc/sma.h | 53 +-- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaCommit.c | 145 +++++---- source/dnode/vnode/src/sma/smaEnv.c | 4 +- source/dnode/vnode/src/sma/smaRollup.c | 339 +++++++++++++++++--- source/dnode/vnode/src/sma/smaTimeRange.c | 2 +- source/dnode/vnode/src/sma/smaUtil.c | 32 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 + source/libs/executor/src/executor.c | 2 +- source/libs/executor/src/tsimplehash.c | 13 +- source/util/src/terror.c | 1 + 15 files changed, 425 insertions(+), 174 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4454c061ae..a283b7c9c1 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2660,7 +2660,7 @@ typedef struct { } SVgEpSet; typedef struct { - int32_t padding; + // padding } SRSmaExecMsg; typedef struct { diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 6462c7afbf..2bf840fd01 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -201,6 +201,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) TD_DEF_MSG_TYPE(TDMT_VND_FETCH_RSMA, "vnode-fetch-rsma", SRSmaFetchMsg, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_EXEC_RSMA, "vnode-exec-rsma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_BATCH_DEL, "batch-delete", SBatchDeleteReq, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 3ca6978156..2d41874912 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -614,6 +614,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RSMA_REMOVE_EXISTS TAOS_DEF_ERROR_CODE(0, 0x3154) #define TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP TAOS_DEF_ERROR_CODE(0, 0x3155) #define TSDB_CODE_RSMA_EMPTY_INFO TAOS_DEF_ERROR_CODE(0, 0x3156) +#define TSDB_CODE_RSMA_INVALID_SCHEMA TAOS_DEF_ERROR_CODE(0, 0x3157) //index #define TSDB_CODE_INDEX_REBUILDING TAOS_DEF_ERROR_CODE(0, 0x3200) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 7c6807ab87..1b4efeca7a 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -338,6 +338,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH_RSMA, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_EXEC_RSMA, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index fb8352d543..bc204e032d 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -57,9 +57,10 @@ typedef struct { void *tmrHandle; // shared by all fetch tasks } SSmaMgmt; -#define SMA_ENV_LOCK(env) (&(env)->lock) -#define SMA_ENV_TYPE(env) ((env)->type) -#define SMA_ENV_STAT(env) ((env)->pStat) +#define SMA_ENV_LOCK(env) (&(env)->lock) +#define SMA_ENV_TYPE(env) ((env)->type) +#define SMA_ENV_STAT(env) ((env)->pStat) +#define SMA_RSMA_STAT(sma) ((SRSmaStat *)SMA_ENV_STAT((SSmaEnv *)(sma)->pRSmaEnv)) struct STSmaStat { int8_t state; // ETsdbSmaStat @@ -86,17 +87,19 @@ struct SQTaskFWriter { }; struct SRSmaStat { - SSma *pSma; - int64_t commitAppliedVer; // vnode applied version for async commit - int64_t refId; // shared by fetch tasks - SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) - int8_t triggerStat; // shared by fetch tasks - int8_t commitStat; // 0 not in committing, 1 in committing - SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w) - SHashObj *rsmaInfoHash; // key: stbUid, value: SRSmaInfo; - SHashObj *iRsmaInfoHash; // key: stbUid, value: SRSmaInfo; immutable rsmaInfoHash + SSma *pSma; + int64_t commitAppliedVer; // vnode applied version for async commit + int64_t refId; // shared by fetch tasks + volatile int64_t qBufSize; // queue buffer size + SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) + int8_t triggerStat; // shared by fetch tasks + int8_t commitStat; // 0 not in committing, 1 in committing + int8_t execStat; // 0 not in exec , 1 in exec + SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w) + SHashObj *rsmaInfoHash; // key: stbUid, value: SRSmaInfo; }; + struct SSmaStat { union { STSmaStat tsmaStat; // time-range-wise sma @@ -105,10 +108,9 @@ struct SSmaStat { T_REF_DECLARE() }; -#define SMA_TSMA_STAT(s) (&(s)->tsmaStat) -#define SMA_RSMA_STAT(s) (&(s)->rsmaStat) +#define SMA_STAT_TSMA(s) (&(s)->tsmaStat) +#define SMA_STAT_RSMA(s) (&(s)->rsmaStat) #define RSMA_INFO_HASH(r) ((r)->rsmaInfoHash) -#define RSMA_IMU_INFO_HASH(r) ((r)->iRsmaInfoHash) #define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat) #define RSMA_COMMIT_STAT(r) (&(r)->commitStat) #define RSMA_REF_ID(r) ((r)->refId) @@ -122,23 +124,25 @@ struct SRSmaInfoItem { }; struct SRSmaInfo { - STSchema *pTSchema; - STaosQueue *queue; // buffer queue of SubmitReq - STaosQall *qall; - int64_t suid; - int64_t refId; // refId of SRSmaStat - int8_t delFlag; + STSchema *pTSchema; + int64_t suid; + int64_t refId; // refId of SRSmaStat + int8_t delFlag; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t - void *iTaskInfo[TSDB_RETENTION_L2]; // immutable + STaosQueue *queue; // buffer queue of SubmitReq + STaosQall *qall; // buffer qall of SubmitReq + void *iTaskInfo[TSDB_RETENTION_L2]; // immutable qTaskInfo_t + STaosQueue *iQueue; // immutable buffer queue of SubmitReq + STaosQall *iQall; // immutable buffer qall of SubmitReq }; #define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) #define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) #define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) #define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) -#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) +#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) #define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) enum { @@ -230,12 +234,13 @@ static FORCE_INLINE void tdSmaStatSetDropped(STSmaStat *pTStat) { void tdRSmaQTaskInfoGetFileName(int32_t vid, int64_t version, char *outputName); void tdRSmaQTaskInfoGetFullName(int32_t vid, int64_t version, const char *path, char *outputName); -int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo **pDest, SRSmaInfo *pSrc); +int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); void tdFreeQTaskInfo(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); +int32_t tdRSmaProcessExecImpl(SSma *pSma); int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 1cc8e1bb27..f85a3d8361 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -188,6 +188,7 @@ int32_t smaAsyncCommit(SSma* pSma); int32_t smaAsyncPostCommit(SSma* pSma); int32_t smaDoRetention(SSma* pSma, int64_t now); int32_t smaProcessFetch(SSma* pSma, void* pMsg); +int32_t smaProcessExec(SSma* pSma, void* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 373cfdfb47..807c033489 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -83,8 +83,7 @@ int32_t smaBegin(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); int8_t rsmaTriggerStat = atomic_val_compare_exchange_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_PAUSED, TASK_TRIGGER_STAT_ACTIVE); @@ -122,8 +121,8 @@ static int32_t tdProcessRSmaSyncPreCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); + SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); + SRSmaStat *pRSmaStat = SMA_STAT_RSMA(pStat); // step 1: set rsma stat paused atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_PAUSED); @@ -289,8 +288,7 @@ static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(SMA_ENV_STAT(pSmaEnv)); + SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); // cleanup outdated qtaskinfo files tdCleanupQTaskInfoFiles(pSma, pRSmaStat); @@ -314,7 +312,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); + SRSmaStat *pRSmaStat = SMA_STAT_RSMA(pStat); // step 1: set rsma stat atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_PAUSED); @@ -336,24 +334,30 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } } - // step 3: swap rsmaInfoHash and iRsmaInfoHash + // step 3: consume the SubmitReq in buffer + if (tdRSmaProcessExecImpl(pSma) < 0) { + return TSDB_CODE_FAILED; + } + + // step 4: swap rsmaInfoHash and iRsmaInfoHash // lock taosWLockLatch(SMA_ENV_LOCK(pEnv)); ASSERT(RSMA_INFO_HASH(pRSmaStat)); - ASSERT(!RSMA_IMU_INFO_HASH(pRSmaStat)); - RSMA_IMU_INFO_HASH(pRSmaStat) = RSMA_INFO_HASH(pRSmaStat); - RSMA_INFO_HASH(pRSmaStat) = - taosHashInit(RSMA_TASK_INFO_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); + void *pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL); - if (!RSMA_INFO_HASH(pRSmaStat)) { - // unlock - taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - smaError("vgId:%d, rsma async commit failed since %s", SMA_VID(pSma), terrstr()); - return TSDB_CODE_FAILED; + while (pIter) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; + TSWAP(pInfo->iQall, pInfo->qall); + TSWAP(pInfo->iQueue, pInfo->queue); + TSWAP(pInfo->iTaskInfo[0], pInfo->taskInfo[0]); + TSWAP(pInfo->iTaskInfo[1], pInfo->taskInfo[1]); + pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter); } + atomic_store_64(&pRSmaStat->qBufSize, 0); + // unlock taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); @@ -375,11 +379,9 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); - // perform persist task for qTaskInfo - tdRSmaPersistExecImpl(pRSmaStat, RSMA_IMU_INFO_HASH(pRSmaStat)); + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); + tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); return TSDB_CODE_SUCCESS; } @@ -396,65 +398,68 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); + SArray *rsmaDeleted = NULL; - // step 1: merge rsmaInfoHash and iRsmaInfoHash + // step 1: merge qTaskInfo and iQTaskInfo // lock taosWLockLatch(SMA_ENV_LOCK(pEnv)); -#if 0 - if (taosHashGetSize(RSMA_INFO_HASH(pRSmaStat)) <= 0) { - // just switch the hash pointer if rsmaInfoHash is empty - if (taosHashGetSize(RSMA_IMU_INFO_HASH(pRSmaStat)) > 0) { - SHashObj *infoHash = RSMA_INFO_HASH(pRSmaStat); - RSMA_INFO_HASH(pRSmaStat) = RSMA_IMU_INFO_HASH(pRSmaStat); - RSMA_IMU_INFO_HASH(pRSmaStat) = infoHash; - } - } else { -#endif -#if 1 - void *pIter = taosHashIterate(RSMA_IMU_INFO_HASH(pRSmaStat), NULL); + + void *pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL); while (pIter) { tb_uid_t *pSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - - if (!taosHashGet(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)pIter; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - int32_t refVal = T_REF_VAL_GET(pRSmaInfo); - if (refVal == 0) { - tdFreeRSmaInfo(pSma, pRSmaInfo, true); - smaDebug( - "vgId:%d, rsma async post commit, free rsma info since already deleted and ref is 0 for " - "table:%" PRIi64, - SMA_VID(pSma), *pSuid); - } else { - smaDebug( - "vgId:%d, rsma async post commit, not free rsma info since ref is %d although already deleted for " - "table:%" PRIi64, - SMA_VID(pSma), refVal, *pSuid); + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)pIter; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + int32_t refVal = T_REF_VAL_GET(pRSmaInfo); + if (refVal == 0) { + if(!rsmaDeleted) { + if((rsmaDeleted = taosArrayInit(1, sizeof(tb_uid_t)))){ + taosArrayPush(rsmaDeleted, pSuid); + } } - - pIter = taosHashIterate(RSMA_IMU_INFO_HASH(pRSmaStat), pIter); - continue; + } else { + smaDebug( + "vgId:%d, rsma async post commit, not free rsma info since ref is %d although already deleted for " + "table:%" PRIi64, + SMA_VID(pSma), refVal, *pSuid); } - taosHashPut(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t), pIter, sizeof(pIter)); - smaDebug("vgId:%d, rsma async post commit, migrated from iRsmaInfoHash for table:%" PRIi64, SMA_VID(pSma), - *pSuid); - } else { - // free the resources - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)pIter; - tdFreeRSmaInfo(pSma, pRSmaInfo, false); - smaDebug("vgId:%d, rsma async post commit, free rsma info since already COW for table:%" PRIi64, SMA_VID(pSma), - *pSuid); + + pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter); + continue; } - pIter = taosHashIterate(RSMA_IMU_INFO_HASH(pRSmaStat), pIter); - } -#endif - // } + if (pRSmaInfo->taskInfo[0]) { + if (pRSmaInfo->iTaskInfo[0]) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)pRSmaInfo->iTaskInfo[0]; + tdFreeRSmaInfo(pSma, pRSmaInfo, true); + pRSmaInfo->iTaskInfo[0] = NULL; + } + } else { + TSWAP(pRSmaInfo->taskInfo[0], pRSmaInfo->iTaskInfo[0]); + } - taosHashCleanup(RSMA_IMU_INFO_HASH(pRSmaStat)); - RSMA_IMU_INFO_HASH(pRSmaStat) = NULL; + taosHashPut(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t), pIter, sizeof(pIter)); + smaDebug("vgId:%d, rsma async post commit, migrated from iRsmaInfoHash for table:%" PRIi64, SMA_VID(pSma), *pSuid); + + pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter); + } + + if (taosArrayGetSize(rsmaDeleted) > 0) { + for (int32_t i = 0; i < taosArrayGetSize(rsmaDeleted); ++i) { + tb_uid_t *pSuid = taosArrayGet(rsmaDeleted, i); + void *pRSmaInfo = taosHashGet(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); + if ((pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { + tdFreeRSmaInfo(pSma, pRSmaInfo, true); + smaDebug( + "vgId:%d, rsma async post commit, free rsma info since already deleted and ref is 0 for " + "table:%" PRIi64, + SMA_VID(pSma), *pSuid); + } + taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); + } + // remove suid in files + taosArrayDestroy(rsmaDeleted); + } // unlock taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 50db1123ae..73f8060559 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -315,9 +315,9 @@ void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType) { int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType) { if (pSmaStat) { if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { - tdDestroyTSmaStat(SMA_TSMA_STAT(pSmaStat)); + tdDestroyTSmaStat(SMA_STAT_TSMA(pSmaStat)); } else if (smaType == TSDB_SMA_TYPE_ROLLUP) { - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSmaStat); + SRSmaStat *pRSmaStat = &pSmaStat->rsmaStat; int32_t vid = SMA_VID(pRSmaStat->pSma); int64_t refId = RSMA_REF_ID(pRSmaStat); if (taosRemoveRef(smaMgmt.rsetId, RSMA_REF_ID(pRSmaStat)) < 0) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 1660223d77..41393eb52f 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -17,6 +17,7 @@ #define RSMA_QTASKINFO_BUFSIZE 32768 #define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid +#define RSMA_QTASKEXEC_BUFSIZ 1 // * 1048576 // 8 MB SSmaMgmt smaMgmt = { .inited = 0, @@ -27,17 +28,18 @@ SSmaMgmt smaMgmt = { #define TD_RSMAINFO_DEL_FILE "rsmainfo.del" typedef struct SRSmaQTaskInfoItem SRSmaQTaskInfoItem; typedef struct SRSmaQTaskInfoIter SRSmaQTaskInfoIter; +typedef struct SRSmaExecQItem SRSmaExecQItem; static int32_t tdUidStorePut(STbUidStore *pStore, tb_uid_t suid, tb_uid_t *uid); static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids); static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid, - int8_t level); +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, + tb_uid_t suid, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, int8_t blkType); + int64_t suid); static void tdRSmaFetchTrigger(void *param, void *tmrId); static int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level); static int32_t tdRSmaQTaskInfoIterInit(SRSmaQTaskInfoIter *pIter, STFile *pTFile); @@ -76,6 +78,11 @@ struct SRSmaQTaskInfoIter { int32_t nBufPos; }; +struct SRSmaExecQItem { + void *pRSmaInfo; + void *qall; +}; + void tdRSmaQTaskInfoGetFileName(int32_t vgId, int64_t version, char *outputName) { tdGetVndFileName(vgId, NULL, VNODE_RSMA_DIR, TD_QTASKINFO_FNAME_PREFIX, version, outputName); } @@ -143,8 +150,12 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { if (isDeepFree) { if (pInfo->queue) taosCloseQueue(pInfo->queue); if (pInfo->qall) taosFreeQall(pInfo->qall); + if (pInfo->iQueue) taosCloseQueue(pInfo->iQueue); + if (pInfo->iQall) taosFreeQall(pInfo->iQall); pInfo->queue = NULL; pInfo->qall = NULL; + pInfo->iQueue = NULL; + pInfo->iQall = NULL; } taosMemoryFree(pInfo); @@ -362,9 +373,18 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con if (!(pRSmaInfo->queue = taosOpenQueue())) { goto _err; } + smaError("vgId:%d init bufSize:%" PRIi64 ", qMemSize:%" PRIi64, SMA_VID(pSma), atomic_load_64(&pStat->qBufSize), + taosQueueMemorySize(pRSmaInfo->queue)); + if (!(pRSmaInfo->qall = taosAllocateQall())) { goto _err; } + if (!(pRSmaInfo->iQueue = taosOpenQueue())) { + goto _err; + } + if (!(pRSmaInfo->iQall = taosAllocateQall())) { + goto _err; + } pRSmaInfo->suid = suid; pRSmaInfo->refId = RSMA_REF_ID(pStat); T_REF_INIT_VAL(pRSmaInfo, 1); @@ -433,8 +453,7 @@ int32_t tdProcessRSmaDrop(SSma *pSma, SVDropStbReq *pReq) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); - SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat); + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); SRSmaInfo *pRSmaInfo = tdAcquireRSmaInfoBySuid(pSma, pReq->suid); @@ -619,7 +638,7 @@ _end: } static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, int8_t blkType) { + int64_t suid) { SArray *pResList = taosArrayInit(1, POINTER_BYTES); if (pResList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -690,8 +709,50 @@ _err: return TSDB_CODE_FAILED; } -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid, - int8_t level) { +/** + * @brief Copy msg to rsmaQueueBuffer + * + * @param pSma + * @param pMsg + * @param inputType + * @param pInfo + * @param suid + * @return int32_t + */ +static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfo *pInfo, + tb_uid_t suid) { + const SSubmitReq *pReq = (const SSubmitReq *)pMsg; + + void *qItem = taosAllocateQitem(pReq->length, DEF_QITEM); + if (!qItem) { + return TSDB_CODE_FAILED; + } + + memcpy(qItem, pMsg, pReq->header.contLen); + + taosWriteQitem(pInfo->queue, qItem); + + SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); + int64_t size = atomic_fetch_add_64(&pRSmaStat->qBufSize, taosQueueMemorySize(pInfo->queue)); + smaError("vgId:%d originSize:%" PRIi64 ", after push size is:%" PRIi64, SMA_VID(pSma), size, + atomic_load_64(&pRSmaStat->qBufSize)); + return TSDB_CODE_SUCCESS; +} + +/** + * @brief sync mode + * + * @param pSma + * @param pMsg + * @param msgSize + * @param inputType + * @param pInfo + * @param suid + * @param level + * @return int32_t + */ +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, + tb_uid_t suid, int8_t level) { int32_t idx = level - 1; if (!pInfo || !RSMA_INFO_QTASK(pInfo, idx)) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, suid); @@ -705,14 +766,13 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64, SMA_VID(pSma), level, RSMA_INFO_QTASK(pInfo, idx), suid); - if (qSetMultiStreamInput(RSMA_INFO_QTASK(pInfo, idx), pMsg, 1, inputType) < 0) { // INPUT__DATA_SUBMIT + if (qSetMultiStreamInput(RSMA_INFO_QTASK(pInfo, idx), pMsg, msgSize, inputType) < 0) { // INPUT__DATA_SUBMIT smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); return TSDB_CODE_FAILED; } SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaFetchAndSubmitResult(pSma, RSMA_INFO_QTASK(pInfo, idx), pItem, pInfo->pTSchema, suid, - STREAM_INPUT__DATA_SUBMIT); + tdRSmaFetchAndSubmitResult(pSma, RSMA_INFO_QTASK(pInfo, idx), pItem, pInfo->pTSchema, suid); atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); if (smaMgmt.tmrHandle) { @@ -752,8 +812,15 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); return NULL; } + if (!pRSmaInfo->taskInfo[0]) { + if (tdCloneRSmaInfo(pSma, pRSmaInfo) < 0) { + taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + return NULL; + } + } tdRefRSmaInfo(pSma, pRSmaInfo); taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + ASSERT(pRSmaInfo->suid == suid); return pRSmaInfo; } taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); @@ -762,41 +829,9 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { return NULL; } - // clone the SRSmaInfo from iRsmaInfoHash to rsmaInfoHash if in committing stat - SRSmaInfo *pCowRSmaInfo = NULL; - // lock - taosWLockLatch(SMA_ENV_LOCK(pEnv)); - if (!(pCowRSmaInfo = taosHashGet(RSMA_INFO_HASH(pStat), &suid, sizeof(tb_uid_t)))) { // 2-phase lock - void *iRSmaInfo = taosHashGet(RSMA_IMU_INFO_HASH(pStat), &suid, sizeof(tb_uid_t)); - if (iRSmaInfo) { - SRSmaInfo *pIRSmaInfo = *(SRSmaInfo **)iRSmaInfo; - if (pIRSmaInfo && !RSMA_INFO_IS_DEL(pIRSmaInfo)) { - if (tdCloneRSmaInfo(pSma, &pCowRSmaInfo, pIRSmaInfo) < 0) { - // unlock - taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - smaError("vgId:%d, clone rsma info failed for suid:%" PRIu64 " since %s", SMA_VID(pSma), suid, terrstr()); - return NULL; - } - smaDebug("vgId:%d, clone rsma info succeed for suid:%" PRIu64, SMA_VID(pSma), suid); - if (taosHashPut(RSMA_INFO_HASH(pStat), &suid, sizeof(tb_uid_t), &pCowRSmaInfo, sizeof(pCowRSmaInfo)) < 0) { - // unlock - taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - smaError("vgId:%d, clone rsma info failed for suid:%" PRIu64 " since %s", SMA_VID(pSma), suid, terrstr()); - return NULL; - } - } - } - } else { - pCowRSmaInfo = *(SRSmaInfo **)pCowRSmaInfo; - ASSERT(!pCowRSmaInfo); - } - - if (pCowRSmaInfo) { - tdRefRSmaInfo(pSma, pCowRSmaInfo); - } // unlock taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - return pCowRSmaInfo; + return pRSmaInfo; } static FORCE_INLINE void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { @@ -805,10 +840,47 @@ static FORCE_INLINE void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { } } +/** + * @brief async mode + * + * @param pSma + * @param pMsg + * @param inputType + * @param suid + * @return int32_t + */ +static int32_t tdExecuteRSmaAsync(SSma *pSma, const void *pMsg, int32_t inputType, tb_uid_t suid) { + SRSmaInfo *pRSmaInfo = tdAcquireRSmaInfoBySuid(pSma, suid); + if (!pRSmaInfo) { + smaDebug("vgId:%d, execute rsma, no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); + return TSDB_CODE_SUCCESS; + } + + if (inputType == STREAM_INPUT__DATA_SUBMIT) { + if (tdExecuteRSmaImplAsync(pSma, pMsg, inputType, pRSmaInfo, suid) < 0) { + tdReleaseRSmaInfo(pSma, pRSmaInfo); + return TSDB_CODE_FAILED; + } + } + + tdReleaseRSmaInfo(pSma, pRSmaInfo); + return TSDB_CODE_SUCCESS; +} + +#if 0 +/** + * @brief sync mode + * + * @param pSma + * @param pMsg + * @param inputType + * @param suid + * @return int32_t + */ static int32_t tdExecuteRSma(SSma *pSma, const void *pMsg, int32_t inputType, tb_uid_t suid) { SRSmaInfo *pRSmaInfo = tdAcquireRSmaInfoBySuid(pSma, suid); if (!pRSmaInfo) { - smaError("vgId:%d, execute rsma, no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); + smaDebug("vgId:%d, execute rsma, no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); return TSDB_CODE_SUCCESS; } @@ -820,6 +892,47 @@ static int32_t tdExecuteRSma(SSma *pSma, const void *pMsg, int32_t inputType, tb tdReleaseRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_SUCCESS; } +#endif + +static int32_t tdRSmaExecCheck(SSma *pSma) { + SRSmaStat *pRsmaStat = SMA_RSMA_STAT(pSma); + int64_t bufSize = atomic_load_64(&pRsmaStat->qBufSize); + + if ((pRsmaStat->execStat == 1) || (bufSize < RSMA_QTASKEXEC_BUFSIZ)) { + smaError("vgId:%d, return directly as execStat:%" PRIi8 ", bufSize:%" PRIi64, SMA_VID(pSma), pRsmaStat->execStat, + bufSize); + return TSDB_CODE_SUCCESS; + } + smaError("vgId:%d, go on exec as execStat:%" PRIi8 ", bufSize:%" PRIi64, SMA_VID(pSma), pRsmaStat->execStat, bufSize); + + pRsmaStat->execStat = 1; + + SRSmaExecMsg fetchMsg; + int32_t contLen = sizeof(SMsgHead); + void *pBuf = rpcMallocCont(0 + contLen); + + ((SMsgHead *)pBuf)->vgId = SMA_VID(pSma); + ((SMsgHead *)pBuf)->contLen = sizeof(SMsgHead); + + SRpcMsg rpcMsg = { + .code = 0, + .msgType = TDMT_VND_EXEC_RSMA, + .pCont = pBuf, + .contLen = contLen, + }; + + if ((terrno = tmsgPutToQueue(&pSma->pVnode->msgCb, QUERY_QUEUE, &rpcMsg)) != 0) { + smaError("vgId:%d, failed to put rsma exec msg into query-queue since %s", SMA_VID(pSma), terrstr()); + goto _err; + } + + smaDebug("vgId:%d, success to put rsma fetch msg into query-queue", SMA_VID(pSma)); + + return TSDB_CODE_SUCCESS; +_err: + pRsmaStat->execStat = 0; + return TSDB_CODE_FAILED; +} int32_t tdProcessRSmaSubmit(SSma *pSma, void *pMsg, int32_t inputType) { SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); @@ -839,16 +952,18 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, void *pMsg, int32_t inputType) { tdFetchSubmitReqSuids(pMsg, &uidStore); if (uidStore.suid != 0) { - tdExecuteRSma(pSma, pMsg, inputType, uidStore.suid); + tdExecuteRSmaAsync(pSma, pMsg, inputType, uidStore.suid); void *pIter = taosHashIterate(uidStore.uidHash, NULL); while (pIter) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - tdExecuteRSma(pSma, pMsg, inputType, *pTbSuid); + tdExecuteRSmaAsync(pSma, pMsg, inputType, *pTbSuid); pIter = taosHashIterate(uidStore.uidHash, pIter); } tdUidStoreDestory(&uidStore); + + tdRSmaExecCheck(pSma); } } return TSDB_CODE_SUCCESS; @@ -1282,7 +1397,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pRSmaInfo, i); + qTaskInfo_t taskInfo = RSMA_INFO_IQTASK(pRSmaInfo, i); if (!taskInfo) { smaDebug("vgId:%d, rsma, table %" PRIi64 " level %d qTaskInfo is NULL", vid, pRSmaInfo->suid, i + 1); continue; @@ -1452,7 +1567,7 @@ _end: * @param level * @return int32_t */ -int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level) { +static int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level) { SRSmaFetchMsg fetchMsg = {.suid = pInfo->suid, .level = level}; int32_t ret = 0; int32_t contLen = 0; @@ -1479,7 +1594,7 @@ int32_t tdRSmaFetchSend(SSma *pSma, SRSmaInfo *pInfo, int8_t level) { .code = 0, .msgType = TDMT_VND_FETCH_RSMA, .pCont = pBuf, - .contLen = contLen, + .contLen = contLen + sizeof(SMsgHead), }; if ((terrno = tmsgPutToQueue(&pSma->pVnode->msgCb, QUERY_QUEUE, &rpcMsg)) != 0) { @@ -1541,7 +1656,7 @@ int32_t smaProcessFetch(SSma *pSma, void *pMsg) { if ((terrno = qSetMultiStreamInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaFetchAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, STREAM_INPUT__DATA_BLOCK) < 0) { + if (tdRSmaFetchAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid) < 0) { goto _err; } @@ -1558,3 +1673,125 @@ _err: smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); return TSDB_CODE_FAILED; } + +static void tdFreeRSmaSubmitItems(SArray *pItems) { + for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { + taosFreeQitem(*(void **)taosArrayGet(pItems, i)); + } +} + +int32_t tdRSmaProcessExecImpl(SSma *pSma) { + SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); + SHashObj *infoHash = NULL; + SArray *pSubmitQArr = NULL; + SArray *pSubmitArr = NULL; + + if (!pRSmaStat || !(infoHash = RSMA_INFO_HASH(pRSmaStat))) { + terrno = TSDB_CODE_RSMA_INVALID_STAT; + goto _err; + } + + taosRLockLatch(SMA_ENV_LOCK(pEnv)); + if (atomic_load_64(&pRSmaStat->qBufSize) < RSMA_QTASKEXEC_BUFSIZ) { + taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + return TSDB_CODE_SUCCESS; + } + taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + + if (!(pSubmitQArr = taosArrayInit(taosHashGetSize(infoHash), sizeof(SRSmaExecQItem)))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + if (!(pSubmitArr = taosArrayInit(1024, POINTER_BYTES))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + SRSmaExecQItem qItem = {0}; + taosWLockLatch(SMA_ENV_LOCK(pEnv)); + void *pIter = taosHashIterate(infoHash, NULL); + while (pIter) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; + if (taosQueueItemSize(pInfo->queue)) { + taosReadAllQitems(pInfo->queue, pInfo->qall); + qItem.qall = &pInfo->qall; + qItem.pRSmaInfo = pIter; + taosArrayPush(pSubmitQArr, &qItem); + } + ASSERT(taosQueueItemSize(pInfo->queue) == 0); + pIter = taosHashIterate(infoHash, pIter); + } + + atomic_store_64(&pRSmaStat->qBufSize, 0); + taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); + smaError("vgId:%d after exec qBufSize is:%" PRIi64, SMA_VID(pSma), atomic_load_64(&pRSmaStat->qBufSize)); + + int32_t qSize = taosArrayGetSize(pSubmitQArr); + for (int32_t i = 0; i < qSize; ++i) { + SRSmaExecQItem *pItem = taosArrayGet(pSubmitQArr, i); + while (1) { + void *msg = NULL; + taosGetQitem(*(STaosQall **)pItem->qall, (void **)&msg); + if (msg) { + if (taosArrayPush(pSubmitArr, &msg) < 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; + } + } else { + break; + } + } + + int32_t size = taosArrayGetSize(pSubmitArr); + if (size > 0) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, *(SSubmitReq**)pSubmitArr->pData, size, STREAM_INPUT__DATA_SUBMIT, pInfo, pInfo->suid, i) < 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; + } + } + tdFreeRSmaSubmitItems(pSubmitArr); + taosArrayClear(pSubmitArr); + } + } + + taosArrayDestroy(pSubmitArr); + taosArrayDestroy(pSubmitQArr); + return TSDB_CODE_SUCCESS; +_err: + taosArrayDestroy(pSubmitArr); + taosArrayDestroy(pSubmitQArr); + return TSDB_CODE_FAILED; +} + +/** + * @brief exec rsma level 1data, fetch result of level 2/3 and submit + * + * @param pSma + * @param pMsg + * @return int32_t + */ +int32_t smaProcessExec(SSma *pSma, void *pMsg) { + SRpcMsg *pRpcMsg = (SRpcMsg *)pMsg; + SRSmaStat *pRsmaStat = SMA_RSMA_STAT(pSma); + + if (!pRpcMsg || pRpcMsg->contLen < sizeof(SMsgHead)) { + terrno = TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP; + goto _err; + } + + if (tdRSmaProcessExecImpl(pSma) < 0) { + goto _err; + } + + pRsmaStat->execStat = 0; + smaWarn("vgId:%d, success to process rsma exec msg", SMA_VID(pSma)); + return TSDB_CODE_SUCCESS; +_err: + pRsmaStat->execStat = 0; + smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); + return TSDB_CODE_FAILED; +} diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index f46d9dc29c..a6302b9235 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -175,7 +175,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { } tdRefSmaStat(pSma, pStat); - pTsmaStat = SMA_TSMA_STAT(pStat); + pTsmaStat = SMA_STAT_TSMA(pStat); if (!pTsmaStat->pTSma) { STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index d9f38ffd09..da70222485 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -350,49 +350,45 @@ _err: } /** - * @brief pTSchema is shared + * @brief Clone qTaskInfo of SRSmaInfo * * @param pSma - * @param pDest - * @param pSrc + * @param pInfo * @return int32_t */ -int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo **pDest, SRSmaInfo *pSrc) { - SVnode *pVnode = pSma->pVnode; +int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { SRSmaParam *param = NULL; - if (!pSrc) { - *pDest = NULL; + if (!pInfo) { return TSDB_CODE_SUCCESS; } SMetaReader mr = {0}; metaReaderInit(&mr, SMA_META(pSma), 0); - smaDebug("vgId:%d, rsma clone, suid is %" PRIi64, TD_VID(pVnode), pSrc->suid); - if (metaGetTableEntryByUid(&mr, pSrc->suid) < 0) { - smaError("vgId:%d, rsma clone, failed to get table meta for %" PRIi64 " since %s", TD_VID(pVnode), pSrc->suid, + smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid); + if (metaGetTableEntryByUid(&mr, pInfo->suid) < 0) { + smaError("vgId:%d, rsma clone, failed to get table meta for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, terrstr()); goto _err; } ASSERT(mr.me.type == TSDB_SUPER_TABLE); - ASSERT(mr.me.uid == pSrc->suid); + ASSERT(mr.me.uid == pInfo->suid); if (TABLE_IS_ROLLUP(mr.me.flags)) { param = &mr.me.stbEntry.rsmaParam; for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (tdCloneQTaskInfo(pSma, pSrc->iTaskInfo[i], pSrc->taskInfo[i], param, pSrc->suid, i) < 0) { + if (tdCloneQTaskInfo(pSma, pInfo->taskInfo[i], pInfo->iTaskInfo[i], param, pInfo->suid, i) < 0) { goto _err; } } - smaDebug("vgId:%d, rsma clone env success for %" PRIi64, TD_VID(pVnode), pSrc->suid); + smaDebug("vgId:%d, rsma clone env success for %" PRIi64, SMA_VID(pSma), pInfo->suid); + } else { + terrno = TSDB_CODE_RSMA_INVALID_SCHEMA; + goto _err; } metaReaderClear(&mr); - - *pDest = pSrc; // pointer copy - return TSDB_CODE_SUCCESS; _err: - *pDest = NULL; metaReaderClear(&mr); - smaError("vgId:%d, rsma clone env failed for %" PRIi64 " since %s", TD_VID(pVnode), pSrc->suid, terrstr()); + smaError("vgId:%d, rsma clone env failed for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, terrstr()); return TSDB_CODE_FAILED; } \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 3a25933ec4..751cb21d08 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -295,6 +295,8 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0); case TDMT_VND_FETCH_RSMA: return smaProcessFetch(pVnode->pSma, pMsg); + case TDMT_VND_EXEC_RSMA: + return smaProcessExec(pVnode->pSma, pMsg); default: vError("unknown msg type:%d in query queue", pMsg->msgType); return TSDB_CODE_VND_APP_ERROR; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 7115ad85a5..4c3d5cf7af 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -55,7 +55,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu taosArrayClear(pInfo->pBlockLists); if (type == STREAM_INPUT__MERGED_SUBMIT) { - ASSERT(numOfBlocks > 1); + // ASSERT(numOfBlocks > 1); for (int32_t i = 0; i < numOfBlocks; i++) { SSubmitReq* pReq = *(void**)POINTER_SHIFT(input, i * sizeof(void*)); taosArrayPush(pInfo->pBlockLists, &pReq); diff --git a/source/libs/executor/src/tsimplehash.c b/source/libs/executor/src/tsimplehash.c index dbb50f958d..6b2edf0d5e 100644 --- a/source/libs/executor/src/tsimplehash.c +++ b/source/libs/executor/src/tsimplehash.c @@ -15,6 +15,7 @@ #include "tsimplehash.h" #include "taoserror.h" +#include "tlog.h" #define SHASH_DEFAULT_LOAD_FACTOR 0.75 #define HASH_MAX_CAPACITY (1024 * 1024 * 16) @@ -106,27 +107,27 @@ static SHNode *doCreateHashNode(const void *key, size_t keyLen, const void *data return pNewNode; } -static void taosHashTableResize(SSHashObj *pHashObj) { +static void tSimpleHashTableResize(SSHashObj *pHashObj) { if (!SHASH_NEED_RESIZE(pHashObj)) { return; } int32_t newCapacity = (int32_t)(pHashObj->capacity << 1u); if (newCapacity > HASH_MAX_CAPACITY) { - // uDebug("current capacity:%zu, maximum capacity:%d, no resize applied due to limitation is reached", - // pHashObj->capacity, HASH_MAX_CAPACITY); + uDebug("current capacity:%zu, maximum capacity:%" PRIu64 ", no resize applied due to limitation is reached", + pHashObj->capacity, HASH_MAX_CAPACITY); return; } int64_t st = taosGetTimestampUs(); void *pNewEntryList = taosMemoryRealloc(pHashObj->hashList, sizeof(void *) * newCapacity); if (!pNewEntryList) { - // qWarn("hash resize failed due to out of memory, capacity remain:%zu", pHashObj->capacity); + uWarn("hash resize failed due to out of memory, capacity remain:%zu", pHashObj->capacity); return; } size_t inc = newCapacity - pHashObj->capacity; - memset((char *)pNewEntryList + pHashObj->capacity * sizeof(void *), 0, inc); + memset((char *)pNewEntryList + pHashObj->capacity * sizeof(void *), 0, inc * sizeof(void *)); pHashObj->hashList = pNewEntryList; pHashObj->capacity = newCapacity; @@ -179,7 +180,7 @@ int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, size_t keyLen, cons // need the resize process, write lock applied if (SHASH_NEED_RESIZE(pHashObj)) { - taosHashTableResize(pHashObj); + tSimpleHashTableResize(pHashObj); } int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 6e3067d44e..3f6d3421ec 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -616,6 +616,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_FILE_CORRUPTED, "Rsma file corrupted TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_REMOVE_EXISTS, "Rsma remove exists") TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP, "Rsma fetch msg is messed up") TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_EMPTY_INFO, "Rsma info is empty") +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_SCHEMA, "Rsma invalid schema") //index TAOS_DEFINE_ERROR(TSDB_CODE_INDEX_REBUILDING, "Index is rebuilding") From 34e14f770fd4080058ac07b2936d33ea7a2bd5f2 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 15 Aug 2022 09:39:55 +0800 Subject: [PATCH 03/18] enh: rsma batch process --- source/dnode/vnode/src/sma/smaRollup.c | 17 ++---- source/libs/executor/inc/executil.h | 5 +- source/libs/executor/inc/executorimpl.h | 4 +- source/libs/executor/src/executil.c | 11 ++-- source/libs/executor/src/executorimpl.c | 24 ++++---- source/libs/executor/src/scanoperator.c | 6 +- source/libs/executor/src/timewindowoperator.c | 56 +++++++++---------- 7 files changed, 54 insertions(+), 69 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 41393eb52f..4ffd6479f5 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -17,7 +17,7 @@ #define RSMA_QTASKINFO_BUFSIZE 32768 #define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid -#define RSMA_QTASKEXEC_BUFSIZ 1 // * 1048576 // 8 MB +#define RSMA_QTASKEXEC_BUFSIZ 10 * 1048576 // 8 MB SSmaMgmt smaMgmt = { .inited = 0, @@ -373,8 +373,6 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con if (!(pRSmaInfo->queue = taosOpenQueue())) { goto _err; } - smaError("vgId:%d init bufSize:%" PRIi64 ", qMemSize:%" PRIi64, SMA_VID(pSma), atomic_load_64(&pStat->qBufSize), - taosQueueMemorySize(pRSmaInfo->queue)); if (!(pRSmaInfo->qall = taosAllocateQall())) { goto _err; @@ -723,7 +721,7 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inpu tb_uid_t suid) { const SSubmitReq *pReq = (const SSubmitReq *)pMsg; - void *qItem = taosAllocateQitem(pReq->length, DEF_QITEM); + void *qItem = taosAllocateQitem(pReq->header.contLen, DEF_QITEM); if (!qItem) { return TSDB_CODE_FAILED; } @@ -733,9 +731,8 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inpu taosWriteQitem(pInfo->queue, qItem); SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); - int64_t size = atomic_fetch_add_64(&pRSmaStat->qBufSize, taosQueueMemorySize(pInfo->queue)); - smaError("vgId:%d originSize:%" PRIi64 ", after push size is:%" PRIi64, SMA_VID(pSma), size, - atomic_load_64(&pRSmaStat->qBufSize)); + atomic_fetch_add_64(&pRSmaStat->qBufSize, taosQueueMemorySize(pInfo->queue)); + return TSDB_CODE_SUCCESS; } @@ -899,11 +896,8 @@ static int32_t tdRSmaExecCheck(SSma *pSma) { int64_t bufSize = atomic_load_64(&pRsmaStat->qBufSize); if ((pRsmaStat->execStat == 1) || (bufSize < RSMA_QTASKEXEC_BUFSIZ)) { - smaError("vgId:%d, return directly as execStat:%" PRIi8 ", bufSize:%" PRIi64, SMA_VID(pSma), pRsmaStat->execStat, - bufSize); return TSDB_CODE_SUCCESS; } - smaError("vgId:%d, go on exec as execStat:%" PRIi8 ", bufSize:%" PRIi64, SMA_VID(pSma), pRsmaStat->execStat, bufSize); pRsmaStat->execStat = 1; @@ -1726,7 +1720,6 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma) { atomic_store_64(&pRSmaStat->qBufSize, 0); taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - smaError("vgId:%d after exec qBufSize is:%" PRIi64, SMA_VID(pSma), atomic_load_64(&pRSmaStat->qBufSize)); int32_t qSize = taosArrayGetSize(pSubmitQArr); for (int32_t i = 0; i < qSize; ++i) { @@ -1748,7 +1741,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma) { if (size > 0) { SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, *(SSubmitReq**)pSubmitArr->pData, size, STREAM_INPUT__DATA_SUBMIT, pInfo, pInfo->suid, i) < 0) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, pInfo->suid, i) < 0) { tdFreeRSmaSubmitItems(pSubmitArr); goto _err; } diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 58b2c1b095..d5b979c762 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -22,7 +22,6 @@ #include "tbuffer.h" #include "tcommon.h" #include "tpagedbuf.h" -#include "tsimplehash.h" #define SET_RES_WINDOW_KEY(_k, _ori, _len, _uid) \ do { \ @@ -103,7 +102,7 @@ static FORCE_INLINE void setResultBufPageDirty(SDiskbasedBuf* pBuf, SResultRowPo setBufPageDirty(pPage, true); } -void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t order); +void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int32_t order); void cleanupGroupResInfo(SGroupResInfo* pGroupResInfo); void initMultiResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList); @@ -141,4 +140,4 @@ int32_t resultrowComparAsc(const void* p1, const void* p2); int32_t isQualifiedTable(STableKeyInfo* info, SNode* pTagCond, void* metaHandle, bool* pQualified); -#endif // TDENGINE_QUERYUTIL_H +#endif // TDENGINE_QUERYUTIL_H \ No newline at end of file diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 585b642d2b..11d371d49b 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -296,7 +296,7 @@ enum { }; typedef struct SAggSupporter { - SSHashObj* pResultRowHashTable; // quick locate the window object for each result + SHashObj* pResultRowHashTable; // quick locate the window object for each result char* keyBuf; // window key buffer SDiskbasedBuf* pResultBuf; // query result buffer based on blocked-wised disk file int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row @@ -1045,4 +1045,4 @@ void* destroySqlFunctionCtx(SqlFunctionCtx* pCtx, int32_t numOfOutput); } #endif -#endif // TDENGINE_EXECUTORIMPL_H +#endif // TDENGINE_EXECUTORIMPL_H \ No newline at end of file diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 2e6bd312f3..615d742d40 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -97,7 +97,7 @@ int32_t resultrowComparAsc(const void* p1, const void* p2) { static int32_t resultrowComparDesc(const void* p1, const void* p2) { return resultrowComparAsc(p2, p1); } -void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t order) { +void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int32_t order) { if (pGroupResInfo->pRows != NULL) { taosArrayDestroy(pGroupResInfo->pRows); } @@ -106,10 +106,9 @@ void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, in void* pData = NULL; pGroupResInfo->pRows = taosArrayInit(10, POINTER_BYTES); - size_t keyLen = 0; - int32_t iter = 0; - while ((pData = tSimpleHashIterate(pHashmap, pData, &iter)) != NULL) { - void* key = tSimpleHashGetKey(pData, &keyLen); + size_t keyLen = 0; + while ((pData = taosHashIterate(pHashmap, pData)) != NULL) { + void* key = taosHashGetKey(pData, &keyLen); SResKeyPos* p = taosMemoryMalloc(keyLen + sizeof(SResultRowPosition)); @@ -987,4 +986,4 @@ void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimit pLimitInfo->slimit = slimit; pLimitInfo->remainOffset = limit.offset; pLimitInfo->remainGroupOffset = slimit.offset; -} +} \ No newline at end of file diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 46ca99c8cd..5d07a15b2f 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -250,7 +250,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR SET_RES_WINDOW_KEY(pSup->keyBuf, pData, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); SResultRow* pResult = NULL; @@ -292,7 +292,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR // add a new result set for a new group SResultRowPosition pos = {.pageId = pResult->pageId, .offset = pResult->offset}; - tSimpleHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos, + taosHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos, sizeof(SResultRowPosition)); } @@ -301,7 +301,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR // too many time window in query if (pTaskInfo->execModel == OPTR_EXEC_MODEL_BATCH && - tSimpleHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) { + taosHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW); } @@ -3017,7 +3017,7 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len } SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info); SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo)); - int32_t size = tSimpleHashGetSize(pSup->pResultRowHashTable); + int32_t size = taosHashGetSize(pSup->pResultRowHashTable); size_t keyLen = sizeof(uint64_t) * 2; // estimate the key length int32_t totalSize = sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize); @@ -3045,10 +3045,9 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len setBufPageDirty(pPage, true); releaseBufPage(pSup->pResultBuf, pPage); - int32_t iter = 0; - void* pIter = tSimpleHashIterate(pSup->pResultRowHashTable, NULL, &iter); + void* pIter = taosHashIterate(pSup->pResultRowHashTable, NULL); while (pIter) { - void* key = tSimpleHashGetKey(pIter, &keyLen); + void* key = taosHashGetKey(pIter, &keyLen); SResultRowPosition* p1 = (SResultRowPosition*)pIter; pPage = (SFilePage*)getBufPage(pSup->pResultBuf, p1->pageId); @@ -3080,7 +3079,7 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len memcpy(*result + offset, pRow, pSup->resultRowSize); offset += pSup->resultRowSize; - pIter = tSimpleHashIterate(pSup->pResultRowHashTable, pIter, &iter); + pIter = taosHashIterate(pSup->pResultRowHashTable, pIter); } *(int32_t*)(*result) = offset; @@ -3115,7 +3114,7 @@ int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) { // add a new result set for a new group SResultRowPosition pos = {.pageId = resultRow->pageId, .offset = resultRow->offset}; - tSimpleHashPut(pSup->pResultRowHashTable, result + offset, keyLen, &pos, sizeof(SResultRowPosition)); + taosHashPut(pSup->pResultRowHashTable, result + offset, keyLen, &pos, sizeof(SResultRowPosition)); offset += keyLen; int32_t valueLen = *(int32_t*)(result + offset); @@ -3454,8 +3453,7 @@ int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t n pAggSup->resultRowSize = getResultRowSize(pCtx, numOfOutput); pAggSup->keyBuf = taosMemoryCalloc(1, keyBufSize + POINTER_BYTES + sizeof(int64_t)); - // pAggSup->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK); - pAggSup->pResultRowHashTable = tSimpleHashInit(100000, hashFn); + pAggSup->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK); if (pAggSup->keyBuf == NULL || pAggSup->pResultRowHashTable == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -3481,7 +3479,7 @@ int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t n void cleanupAggSup(SAggSupporter* pAggSup) { taosMemoryFreeClear(pAggSup->keyBuf); - tSimpleHashCleanup(pAggSup->pResultRowHashTable); + taosHashCleanup(pAggSup->pResultRowHashTable); destroyDiskbasedBuf(pAggSup->pResultBuf); } @@ -4779,4 +4777,4 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, const char* pKey, SqlF pCtx[i].pBuf = pSup->pResultBuf; } return code; -} +} \ No newline at end of file diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index c404fca597..c373634c16 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -178,8 +178,8 @@ static SResultRow* getTableGroupOutputBuf(SOperatorInfo* pOperator, uint64_t gro STableScanInfo* pTableScanInfo = pOperator->info; - SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(pTableScanInfo->pdInfo.pAggSup->pResultRowHashTable, buf, - GET_RES_WINDOW_KEY_LEN(sizeof(groupId))); + SResultRowPosition* p1 = + (SResultRowPosition*)taosHashGet(pTableScanInfo->pdInfo.pAggSup->pResultRowHashTable, buf, GET_RES_WINDOW_KEY_LEN(sizeof(groupId))); if (p1 == NULL) { return NULL; @@ -3128,4 +3128,4 @@ _error: taosMemoryFree(pInfo); taosMemoryFree(pOperator); return NULL; -} +} \ No newline at end of file diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index ff323bf4ba..b81cb7724f 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1385,7 +1385,7 @@ bool doClearWindow(SAggSupporter* pAggSup, SExprSupp* pSup, char* pData, int16_t int32_t numOfOutput) { SET_RES_WINDOW_KEY(pAggSup->keyBuf, pData, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)tSimpleHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)taosHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); if (!p1) { // window has been closed return false; @@ -1398,14 +1398,14 @@ bool doDeleteIntervalWindow(SAggSupporter* pAggSup, TSKEY ts, uint64_t groupId) size_t bytes = sizeof(TSKEY); SET_RES_WINDOW_KEY(pAggSup->keyBuf, &ts, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)tSimpleHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)taosHashGet(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); if (!p1) { // window has been closed return false; } // SFilePage* bufPage = getBufPage(pAggSup->pResultBuf, p1->pageId); // dBufSetBufPageRecycled(pAggSup->pResultBuf, bufPage); - tSimpleHashRemove(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + taosHashRemove(pAggSup->pResultRowHashTable, pAggSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); return true; } @@ -1455,12 +1455,11 @@ static void doClearWindows(SAggSupporter* pAggSup, SExprSupp* pSup1, SInterval* } } -static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SHashObj* resWins) { +static int32_t getAllIntervalWindow(SHashObj* pHashMap, SHashObj* resWins) { void* pIte = NULL; size_t keyLen = 0; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - void* key = tSimpleHashGetKey(pIte, &keyLen); + while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); @@ -1473,18 +1472,16 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SHashObj* resWins) { return TSDB_CODE_SUCCESS; } -static int32_t closeIntervalWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, +static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, SHashObj* pPullDataMap, SHashObj* closeWins, SArray* pRecyPages, SDiskbasedBuf* pDiscBuf) { qDebug("===stream===close interval window"); void* pIte = NULL; - void* key = NULL; - size_t keyLen = GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY)); - int32_t iter = 0; - while ((pIte = tSimpleHashIterateKV(pHashMap, pIte, &key, &iter)) != NULL) { - // void* key = tSimpleHashGetKey(pIte, &keyLen); + size_t keyLen = 0; + while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; - // ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); + ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); STimeWindow win; win.skey = ts; @@ -1520,7 +1517,7 @@ static int32_t closeIntervalWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pSup } char keyBuf[GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))]; SET_RES_WINDOW_KEY(keyBuf, &ts, sizeof(TSKEY), groupId); - tSimpleHashRemove(pHashMap, keyBuf, keyLen); + taosHashRemove(pHashMap, keyBuf, keyLen); } } return TSDB_CODE_SUCCESS; @@ -2850,7 +2847,7 @@ bool hasIntervalWindow(SAggSupporter* pSup, TSKEY ts, uint64_t groupId) { int32_t bytes = sizeof(TSKEY); SET_RES_WINDOW_KEY(pSup->keyBuf, &ts, bytes, groupId); SResultRowPosition* p1 = - (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); + (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes)); return p1 != NULL; } @@ -2891,9 +2888,8 @@ static void rebuildIntervalWindow(SStreamFinalIntervalOperatorInfo* pInfo, SExpr bool isDeletedWindow(STimeWindow* pWin, uint64_t groupId, SAggSupporter* pSup) { SET_RES_WINDOW_KEY(pSup->keyBuf, &pWin->skey, sizeof(int64_t), groupId); - SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, - GET_RES_WINDOW_KEY_LEN(sizeof(int64_t))); - + SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, + GET_RES_WINDOW_KEY_LEN(sizeof(int64_t))); return p1 == NULL; } @@ -3021,7 +3017,7 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc } static void clearStreamIntervalOperator(SStreamFinalIntervalOperatorInfo* pInfo) { - tSimpleHashClear(pInfo->aggSup.pResultRowHashTable); + taosHashClear(pInfo->aggSup.pResultRowHashTable); clearDiskbasedBuf(pInfo->aggSup.pResultBuf); cleanupResultRowInfo(&pInfo->binfo.resultRowInfo); initResultRowInfo(&pInfo->binfo.resultRowInfo); @@ -4932,14 +4928,14 @@ static int32_t outputMergeAlignedIntervalResult(SOperatorInfo* pOperatorInfo, ui SExprSupp* pSup = &pOperatorInfo->exprSupp; SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &wstartTs, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet( - iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, + GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); ASSERT(p1 != NULL); finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, pSup->pCtx, pSup->pExprInfo, pSup->numOfExprs, pSup->rowEntryInfoOffset, pResultBlock, pTaskInfo); - tSimpleHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); - ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); + taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); return TSDB_CODE_SUCCESS; } @@ -4962,7 +4958,7 @@ static void doMergeAlignedIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultR // there is an result exists if (miaInfo->curTs != INT64_MIN) { - ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); + ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); if (ts != miaInfo->curTs) { outputMergeAlignedIntervalResult(pOperatorInfo, tableGroupId, pResultBlock, miaInfo->curTs); @@ -4970,7 +4966,7 @@ static void doMergeAlignedIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultR } } else { miaInfo->curTs = ts; - ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); + ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 0); } STimeWindow win = {0}; @@ -5046,7 +5042,7 @@ static void doMergeAlignedIntervalAgg(SOperatorInfo* pOperator) { if (pBlock == NULL) { // close last unfinalized time window if (miaInfo->curTs != INT64_MIN) { - ASSERT(tSimpleHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); + ASSERT(taosHashGetSize(iaInfo->aggSup.pResultRowHashTable) == 1); outputMergeAlignedIntervalResult(pOperator, miaInfo->groupId, pRes, miaInfo->curTs); miaInfo->curTs = INT64_MIN; } @@ -5223,12 +5219,12 @@ static int32_t finalizeWindowResult(SOperatorInfo* pOperatorInfo, uint64_t table SExprSupp* pExprSup = &pOperatorInfo->exprSupp; SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &win->skey, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = (SResultRowPosition*)tSimpleHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, + SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); ASSERT(p1 != NULL); finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, pExprSup->pCtx, pExprSup->pExprInfo, pExprSup->numOfExprs, pExprSup->rowEntryInfoOffset, pResultBlock, pTaskInfo); - tSimpleHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); return TSDB_CODE_SUCCESS; } @@ -5491,4 +5487,4 @@ _error: taosMemoryFreeClear(pOperator); pTaskInfo->code = code; return NULL; -} +} \ No newline at end of file From d170adf2140328935fb9fb0b69f5f082de2569d8 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Tue, 16 Aug 2022 20:22:45 +0800 Subject: [PATCH 04/18] enh: rsma batch process --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/inc/vnd.h | 1 + source/dnode/vnode/src/sma/smaCommit.c | 37 +++-- source/dnode/vnode/src/sma/smaRollup.c | 174 +++++++++++++++------- source/dnode/vnode/src/vnd/vnodeBufPool.c | 12 +- source/dnode/vnode/src/vnd/vnodeCommit.c | 8 +- source/libs/executor/src/scanoperator.c | 3 +- 7 files changed, 169 insertions(+), 68 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index bc204e032d..c36207e495 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -240,7 +240,7 @@ static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); -int32_t tdRSmaProcessExecImpl(SSma *pSma); +int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type); int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer); diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 77b18b8c02..900d29b97e 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -65,6 +65,7 @@ struct SVBufPool { SVBufPool* next; SVnode* pVnode; volatile int32_t nRef; + TdThreadSpinlock lock; int64_t size; uint8_t* ptr; SVBufPoolNode* pTail; diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 807c033489..101fca3346 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -297,10 +297,9 @@ static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma) { } /** - * @brief Rsma async commit implementation + * @brief Rsma async commit implementation(only do some necessary light weighted task) * 1) set rsma stat TASK_TRIGGER_STAT_PAUSED * 2) Wait all running fetch task finish to fetch and put submitMsg into level 2/3 wQueue(blocking level 1 write) - * 3) * * @param pSma * @return int32_t @@ -334,12 +333,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } } - // step 3: consume the SubmitReq in buffer - if (tdRSmaProcessExecImpl(pSma) < 0) { - return TSDB_CODE_FAILED; - } - - // step 4: swap rsmaInfoHash and iRsmaInfoHash + // step 3: swap queue/qall and iQueue/iQal // lock taosWLockLatch(SMA_ENV_LOCK(pEnv)); @@ -379,9 +373,32 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - // perform persist task for qTaskInfo SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); - tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); + + // step 1: consume the SubmitReq in buffer + int32_t nLoops = 0; + smaDebug("vgId:%d start to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + while (pRSmaStat->execStat == 1) { + taosMsleep(15); + if ((++nLoops & 63) == 0) { + smaWarn("vgId:%d 1s waited for rsma exec stat = 0, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + sched_yield(); + } + } + pRSmaStat->execStat = 1; + smaDebug("vgId:%d end to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + + if (tdRSmaProcessExecImpl(pSma, 1) < 0) { + pRSmaStat->execStat = 0; + return TSDB_CODE_FAILED; + } + + // step 2: perform persist task for qTaskInfo operator + if (tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)) < 0) { + pRSmaStat->execStat = 0; + return TSDB_CODE_FAILED; + } + pRSmaStat->execStat = 0; return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4ffd6479f5..de4b7dd808 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -15,9 +15,9 @@ #include "sma.h" -#define RSMA_QTASKINFO_BUFSIZE 32768 +#define RSMA_QTASKINFO_BUFSIZE (32768) #define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid -#define RSMA_QTASKEXEC_BUFSIZ 10 * 1048576 // 8 MB +#define RSMA_QTASKEXEC_BUFSIZ (1048576) SSmaMgmt smaMgmt = { .inited = 0, @@ -35,7 +35,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUi static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - tb_uid_t suid, int8_t level); + int8_t type, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, @@ -600,17 +600,6 @@ static int32_t tdFetchSubmitReqSuids(SSubmitReq *pMsg, STbUidStore *pStore) { return 0; } -static void tdDestroySDataBlockArray(SArray *pArray) { - // TODO -#if 0 - for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { - SSDataBlock *pDataBlock = taosArrayGet(pArray, i); - blockDestroyInner(pDataBlock); - } -#endif - taosArrayDestroy(pArray); -} - /** * @brief retention of rsma1/rsma2 * @@ -668,8 +657,7 @@ static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSm } else { smaDebug("vgId:%d, rsma %" PRIi8 " data fetched", SMA_VID(pSma), pItem->level); } - -#if 1 +#if 0 char flag[10] = {0}; snprintf(flag, 10, "level %" PRIi8, pItem->level); blockDebugShowDataBlocks(pResList, flag); @@ -731,11 +719,45 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inpu taosWriteQitem(pInfo->queue, qItem); SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); - atomic_fetch_add_64(&pRSmaStat->qBufSize, taosQueueMemorySize(pInfo->queue)); + int64_t bufSize = atomic_add_fetch_64(&pRSmaStat->qBufSize, pReq->header.contLen); + + // smoothing consume + int32_t n = bufSize / RSMA_QTASKEXEC_BUFSIZ; + if (n > 1) { + if (n > 10) { + n = 10; + } + taosMsleep(n << 4); + if (n > 2) { + smaWarn("vgId:%d pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), + taosQueueItemSize(pInfo->queue), taosQueueMemorySize(pInfo->queue), n << 4); + } else { + smaDebug("vgId:%d pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), + taosQueueItemSize(pInfo->queue), taosQueueMemorySize(pInfo->queue), n << 4); + } + } return TSDB_CODE_SUCCESS; } +static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { + SSubmitMsgIter msgIter = {0}; + SSubmitBlkIter blkIter = {0}; + STSRow *row = NULL; + if (tInitSubmitMsgIter(pReq, &msgIter) < 0) return -1; + while (true) { + SSubmitBlk *pBlock = NULL; + if (tGetSubmitMsgNext(&msgIter, &pBlock) < 0) return -1; + if (pBlock == NULL) break; + tInitSubmitBlkIter(&msgIter, pBlock, &blkIter); + while ((row = tGetSubmitBlkNext(&blkIter)) != NULL) { + smaDebug("vgId:%d numOfRows:%d, suid:%" PRIi64 ", uid:%" PRIi64 ", version:%" PRIi64 ", ts:%" PRIi64, + SMA_VID(pSma), msgIter.numOfRows, msgIter.suid, msgIter.uid, pReq->version, row->ts); + } + } + return 0; +} + /** * @brief sync mode * @@ -744,32 +766,42 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inpu * @param msgSize * @param inputType * @param pInfo - * @param suid + * @param type * @param level * @return int32_t */ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - tb_uid_t suid, int8_t level) { + int8_t type, int8_t level) { int32_t idx = level - 1; - if (!pInfo || !RSMA_INFO_QTASK(pInfo, idx)) { - smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, suid); + + void *qTaskInfo = (type == 0) ? RSMA_INFO_QTASK(pInfo, idx) : RSMA_INFO_IQTASK(pInfo, idx); + if (!qTaskInfo) { + smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, + pInfo->suid); return TSDB_CODE_SUCCESS; } if (!pInfo->pTSchema) { - smaWarn("vgId:%d, no schema to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, suid); + smaWarn("vgId:%d, no schema to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, pInfo->suid); return TSDB_CODE_FAILED; } smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64, SMA_VID(pSma), level, - RSMA_INFO_QTASK(pInfo, idx), suid); + RSMA_INFO_QTASK(pInfo, idx), pInfo->suid); - if (qSetMultiStreamInput(RSMA_INFO_QTASK(pInfo, idx), pMsg, msgSize, inputType) < 0) { // INPUT__DATA_SUBMIT +#if 0 + for (int32_t i = 0; i < msgSize; ++i) { + SSubmitReq *pReq = *(SSubmitReq **)((char *)pMsg + i * sizeof(void *)); + smaDebug("vgId:%d [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version); + tdRsmaPrintSubmitReq(pSma, pReq); + } +#endif + if (qSetMultiStreamInput(qTaskInfo, pMsg, msgSize, inputType) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); return TSDB_CODE_FAILED; } SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaFetchAndSubmitResult(pSma, RSMA_INFO_QTASK(pInfo, idx), pItem, pInfo->pTSchema, suid); + tdRSmaFetchAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid); atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); if (smaMgmt.tmrHandle) { @@ -858,6 +890,8 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, const void *pMsg, int32_t inputTyp tdReleaseRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; } + } else { + ASSERT(0); } tdReleaseRSmaInfo(pSma, pRSmaInfo); @@ -896,9 +930,18 @@ static int32_t tdRSmaExecCheck(SSma *pSma) { int64_t bufSize = atomic_load_64(&pRsmaStat->qBufSize); if ((pRsmaStat->execStat == 1) || (bufSize < RSMA_QTASKEXEC_BUFSIZ)) { + if (bufSize > RSMA_QTASKEXEC_BUFSIZ) { + smaDebug("vgId:%d bufSize is %d but has no chance to exec as qTaskInfo occupied by another task", SMA_VID(pSma), + bufSize); + } else { + smaDebug("vgId:%d bufSize is %d but has no chance to exec as less than %d", SMA_VID(pSma), bufSize, + RSMA_QTASKEXEC_BUFSIZ); + } return TSDB_CODE_SUCCESS; } + smaDebug("vgId:%d bufSize is %d and has chance to exec as qTaskInfo is free now", SMA_VID(pSma), bufSize); + pRsmaStat->execStat = 1; SRSmaExecMsg fetchMsg; @@ -1633,6 +1676,7 @@ int32_t smaProcessFetch(SSma *pSma, void *pMsg) { goto _err; } +#if 0 pInfo = tdAcquireRSmaInfoBySuid(pSma, req.suid); if (!pInfo) { if (terrno == TSDB_CODE_SUCCESS) { @@ -1657,12 +1701,13 @@ int32_t smaProcessFetch(SSma *pSma, void *pMsg) { tdCleanupStreamInputDataBlock(taskInfo); tdReleaseRSmaInfo(pSma, pInfo); +#endif tDecoderClear(&decoder); smaDebug("vgId:%d, success to process rsma fetch msg for suid:%" PRIi64 " level:%" PRIi8, SMA_VID(pSma), req.suid, req.level); return TSDB_CODE_SUCCESS; _err: - tdReleaseRSmaInfo(pSma, pInfo); + // tdReleaseRSmaInfo(pSma, pInfo); tDecoderClear(&decoder); smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); return TSDB_CODE_FAILED; @@ -1674,7 +1719,14 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { } } -int32_t tdRSmaProcessExecImpl(SSma *pSma) { +/** + * @brief + * + * @param pSma + * @param type 0 triggered when buffer overflow, 1 triggered by commit + * @return int32_t + */ +int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); SHashObj *infoHash = NULL; @@ -1686,12 +1738,14 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma) { goto _err; } - taosRLockLatch(SMA_ENV_LOCK(pEnv)); - if (atomic_load_64(&pRSmaStat->qBufSize) < RSMA_QTASKEXEC_BUFSIZ) { + if (type == 0) { + taosRLockLatch(SMA_ENV_LOCK(pEnv)); + if (atomic_load_64(&pRSmaStat->qBufSize) < RSMA_QTASKEXEC_BUFSIZ) { + taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); + return TSDB_CODE_SUCCESS; + } taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); - return TSDB_CODE_SUCCESS; } - taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); if (!(pSubmitQArr = taosArrayInit(taosHashGetSize(infoHash), sizeof(SRSmaExecQItem)))) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1706,18 +1760,33 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma) { SRSmaExecQItem qItem = {0}; taosWLockLatch(SMA_ENV_LOCK(pEnv)); void *pIter = taosHashIterate(infoHash, NULL); - while (pIter) { - SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; - if (taosQueueItemSize(pInfo->queue)) { - taosReadAllQitems(pInfo->queue, pInfo->qall); - qItem.qall = &pInfo->qall; - qItem.pRSmaInfo = pIter; - taosArrayPush(pSubmitQArr, &qItem); + if (type == 0) { + while (pIter) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; + if (taosQueueItemSize(pInfo->queue)) { + taosReadAllQitems(pInfo->queue, pInfo->qall); + qItem.qall = &pInfo->qall; + qItem.pRSmaInfo = pIter; + taosArrayPush(pSubmitQArr, &qItem); + } + ASSERT(taosQueueItemSize(pInfo->queue) == 0); + pIter = taosHashIterate(infoHash, pIter); } - ASSERT(taosQueueItemSize(pInfo->queue) == 0); - pIter = taosHashIterate(infoHash, pIter); + } else if (type == 1) { + while (pIter) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; + if (taosQueueItemSize(pInfo->iQueue)) { + taosReadAllQitems(pInfo->iQueue, pInfo->iQall); + qItem.qall = &pInfo->iQall; + qItem.pRSmaInfo = pIter; + taosArrayPush(pSubmitQArr, &qItem); + } + ASSERT(taosQueueItemSize(pInfo->iQueue) == 0); + pIter = taosHashIterate(infoHash, pIter); + } + } else { + ASSERT(0); } - atomic_store_64(&pRSmaStat->qBufSize, 0); taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); @@ -1739,12 +1808,16 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma) { int32_t size = taosArrayGetSize(pSubmitArr); if (size > 0) { - SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, pInfo->suid, i) < 0) { - tdFreeRSmaSubmitItems(pSubmitArr); - goto _err; + if (type == 0 || type == 1) { + SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; + } } + } else { + ASSERT(0); } tdFreeRSmaSubmitItems(pSubmitArr); taosArrayClear(pSubmitArr); @@ -1775,16 +1848,17 @@ int32_t smaProcessExec(SSma *pSma, void *pMsg) { terrno = TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP; goto _err; } - - if (tdRSmaProcessExecImpl(pSma) < 0) { + smaDebug("vgId:%d, begin to process rsma exec msg by thread:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + if (tdRSmaProcessExecImpl(pSma, 0) < 0) { goto _err; } pRsmaStat->execStat = 0; - smaWarn("vgId:%d, success to process rsma exec msg", SMA_VID(pSma)); + smaDebug("vgId:%d, success to process rsma exec msg by thead:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); return TSDB_CODE_SUCCESS; _err: pRsmaStat->execStat = 0; - smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); + smaError("vgId:%d, failed to process rsma fetch msg by thread:%p since %s", SMA_VID(pSma), + (void *)taosGetSelfPthreadId(), terrstr()); return TSDB_CODE_FAILED; } diff --git a/source/dnode/vnode/src/vnd/vnodeBufPool.c b/source/dnode/vnode/src/vnd/vnodeBufPool.c index 0623b3bd10..5a22114ab4 100644 --- a/source/dnode/vnode/src/vnd/vnodeBufPool.c +++ b/source/dnode/vnode/src/vnd/vnodeBufPool.c @@ -78,7 +78,7 @@ void vnodeBufPoolReset(SVBufPool *pPool) { void *vnodeBufPoolMalloc(SVBufPool *pPool, int size) { SVBufPoolNode *pNode; void *p; - + taosThreadSpinLock(&pPool->lock); if (pPool->node.size >= pPool->ptr - pPool->node.data + size) { // allocate from the anchor node p = pPool->ptr; @@ -89,6 +89,7 @@ void *vnodeBufPoolMalloc(SVBufPool *pPool, int size) { pNode = taosMemoryMalloc(sizeof(*pNode) + size); if (pNode == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + taosThreadSpinUnlock(&pPool->lock); return NULL; } @@ -101,7 +102,7 @@ void *vnodeBufPoolMalloc(SVBufPool *pPool, int size) { pPool->size = pPool->size + sizeof(*pNode) + size; } - + taosThreadSpinUnlock(&pPool->lock); return p; } @@ -129,6 +130,12 @@ static int vnodeBufPoolCreate(SVnode *pVnode, int64_t size, SVBufPool **ppPool) return -1; } + if (taosThreadSpinInit(&pPool->lock, 0) != 0) { + taosMemoryFree(pPool); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + pPool->next = NULL; pPool->pVnode = pVnode; pPool->nRef = 0; @@ -145,6 +152,7 @@ static int vnodeBufPoolCreate(SVnode *pVnode, int64_t size, SVBufPool **ppPool) static int vnodeBufPoolDestroy(SVBufPool *pPool) { vnodeBufPoolReset(pPool); + taosThreadSpinDestroy(&pPool->lock); taosMemoryFree(pPool); return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index c8dc07af0a..2f5169a0ec 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -220,9 +220,6 @@ int vnodeCommit(SVnode *pVnode) { vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64, TD_VID(pVnode), pVnode->state.commitID, pVnode->state.applied); - vnodeBufPoolUnRef(pVnode->inUse); - pVnode->inUse = NULL; - pVnode->state.commitTerm = pVnode->state.applyTerm; // save info @@ -248,7 +245,7 @@ int vnodeCommit(SVnode *pVnode) { } if (VND_IS_RSMA(pVnode)) { - smaAsyncCommit(pVnode->pSma); + smaAsyncCommit(pVnode->pSma); // would write L2/L3 data into BufPool if (tsdbCommit(VND_RSMA0(pVnode)) < 0) { ASSERT(0); @@ -268,6 +265,9 @@ int vnodeCommit(SVnode *pVnode) { return -1; } } + + vnodeBufPoolUnRef(pVnode->inUse); + pVnode->inUse = NULL; if (tqCommit(pVnode->pTq) < 0) { ASSERT(0); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 32d564d502..adb79fc6ad 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1174,6 +1174,7 @@ static void checkUpdateData(SStreamScanInfo* pInfo, bool invertible, SSDataBlock SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex); ASSERT(pColDataInfo->info.type == TSDB_DATA_TYPE_TIMESTAMP); TSKEY* tsCol = (TSKEY*)pColDataInfo->pData; + bool inserted = updateInfoIsTableInserted(pInfo->pUpdateInfo, pBlock->info.uid); for (int32_t rowId = 0; rowId < pBlock->info.rows; rowId++) { SResultRowInfo dumyInfo; dumyInfo.cur.pageId = -1; @@ -1183,7 +1184,7 @@ static void checkUpdateData(SStreamScanInfo* pInfo, bool invertible, SSDataBlock win = getActiveTimeWindow(NULL, &dumyInfo, tsCol[rowId], &pInfo->interval, TSDB_ORDER_ASC); isClosed = isCloseWindow(&win, &pInfo->twAggSup); } - bool inserted = updateInfoIsTableInserted(pInfo->pUpdateInfo, pBlock->info.uid); + // must check update info first. bool update = updateInfoIsUpdated(pInfo->pUpdateInfo, pBlock->info.uid, tsCol[rowId]); bool closedWin = isClosed && inserted && isSignleIntervalWindow(pInfo) && From 13f5acd4b910f3c13c277d28e0b299bb55736069 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Wed, 17 Aug 2022 16:04:58 +0800 Subject: [PATCH 05/18] fix(query): fix interp pResBlock->rows > capacity cause assert failure TD-18445 --- source/libs/executor/src/timewindowoperator.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 6418f5305c..3e931a489d 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2154,7 +2154,9 @@ static void doKeepLinearInfo(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlo static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp* pExprSup, SSDataBlock* pResBlock) { int32_t rows = pResBlock->info.rows; - + if (rows >= pResBlock->info.capacity) { + return; + } // todo set the correct primary timestamp column // output the result From 853e6e29888afcf14ce4a743c9b82f1c19850bb9 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 17 Aug 2022 19:19:58 +0800 Subject: [PATCH 06/18] refactor(mnode): drop stream task --- examples/c/stream_demo.c | 7 +++--- include/common/tcommon.h | 1 + include/libs/stream/tstream.h | 5 +++++ include/util/taoserror.h | 1 + source/dnode/mnode/impl/src/mndScheduler.c | 2 ++ source/dnode/mnode/impl/src/mndStb.c | 11 ++++++---- source/dnode/vnode/src/tq/tq.c | 14 +++--------- source/libs/stream/inc/streamInc.h | 1 - source/libs/stream/src/stream.c | 8 +++++-- source/libs/stream/src/streamMeta.c | 25 ++++++++++++++++------ source/libs/stream/src/streamQueue.c | 4 +++- source/libs/stream/src/streamTask.c | 4 ++-- source/util/src/terror.c | 1 + 13 files changed, 53 insertions(+), 31 deletions(-) diff --git a/examples/c/stream_demo.c b/examples/c/stream_demo.c index dd4fbc8d2d..2fcf4dd62c 100644 --- a/examples/c/stream_demo.c +++ b/examples/c/stream_demo.c @@ -98,10 +98,9 @@ int32_t create_stream() { /*const char* sql = "select min(k), max(k), sum(k) as sum_of_k from st1";*/ /*const char* sql = "select sum(k) from tu1 interval(10m)";*/ /*pRes = tmq_create_stream(pConn, "stream1", "out1", sql);*/ - pRes = - taos_query(pConn, - "create stream stream1 trigger max_delay 10s into outstb as select _wstart, sum(k) from st1 partition " - "by tbname session(ts, 10s) "); + pRes = taos_query(pConn, + "create stream stream1 trigger max_delay 10s watermark 10s into outstb as select _wstart start, " + "count(k) from st1 partition by tbname interval(20s) "); if (taos_errno(pRes) != 0) { printf("failed to create stream stream1, reason:%s\n", taos_errstr(pRes)); return -1; diff --git a/include/common/tcommon.h b/include/common/tcommon.h index e04d9d5e86..dbe020f7ec 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -60,6 +60,7 @@ enum { STREAM_INPUT__DATA_RETRIEVE, STREAM_INPUT__GET_RES, STREAM_INPUT__CHECKPOINT, + STREAM_INPUT__DESTROY, }; typedef enum EStreamType { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index e6fcb021d5..484d0991f2 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -53,6 +53,7 @@ enum { TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE, TASK_SCHED_STATUS__FAILED, + TASK_SCHED_STATUS__DROPPING, }; enum { @@ -127,6 +128,10 @@ typedef struct { int8_t type; } SStreamCheckpoint; +typedef struct { + int8_t type; +} SStreamTaskDestroy; + typedef struct { int8_t type; SSDataBlock* pBlock; diff --git a/include/util/taoserror.h b/include/util/taoserror.h index d7ec3697af..c3796fbadd 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -291,6 +291,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_STREAM_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03F1) #define TSDB_CODE_MND_INVALID_STREAM_OPTION TAOS_DEF_ERROR_CODE(0, 0x03F2) #define TSDB_CODE_MND_STREAM_MUST_BE_DELETED TAOS_DEF_ERROR_CODE(0, 0x03F3) +#define TSDB_CODE_MND_STREAM_TASK_DROPPED TAOS_DEF_ERROR_CODE(0, 0x03F4) // mnode-sma #define TSDB_CODE_MND_SMA_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x0480) diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index a24b7ef459..3bfd7eb596 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -424,6 +424,8 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { } mndAddTaskToTaskSet(taskSourceLevel, pTask); + pTask->triggerParam = 0; + // source pTask->taskLevel = TASK_LEVEL__SOURCE; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index dd2b595c29..59c6d65953 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2021,8 +2021,7 @@ static int32_t mndCheckDropStbForTopic(SMnode *pMnode, const char *stbFullName, FOREACH(pNode, pNodeList) { SColumnNode *pCol = (SColumnNode *)pNode; - if (pCol->tableId != suid) { - mDebug("topic:%s, check colId:%d passed", pTopic->name, pCol->colId); + if (pCol->tableId == suid) { sdbRelease(pSdb, pTopic); nodesDestroyNode(pAst); return -1; @@ -2045,6 +2044,11 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) break; + if (pStream->smaId == 0 && pStream->targetStbUid == suid) { + sdbRelease(pSdb, pStream); + return -1; + } + SNode *pAst = NULL; if (nodesStringToNode(pStream->ast, &pAst) != 0) { ASSERT(0); @@ -2057,8 +2061,7 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, FOREACH(pNode, pNodeList) { SColumnNode *pCol = (SColumnNode *)pNode; - if (pCol->tableId != suid) { - mDebug("stream:%s, check colId:%d passed", pStream->name, pCol->colId); + if (pCol->tableId == suid) { sdbRelease(pSdb, pStream); nodesDestroyNode(pAst); return -1; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a98fea1988..c6bc8e6e59 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -628,8 +628,6 @@ int32_t tqProcessVgChangeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask) { - int32_t code = 0; - if (pTask->taskLevel == TASK_LEVEL__AGG) { ASSERT(taosArrayGetSize(pTask->childEpInfo) != 0); } @@ -640,8 +638,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask) { pTask->outputQueue = streamQueueOpen(); if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) { - code = -1; - goto FAIL; + return -1; } pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; @@ -686,14 +683,9 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask) { streamSetupTrigger(pTask); - tqInfo("deploy stream task on vg %d, task id %d, child id %d", TD_VID(pTq->pVnode), pTask->taskId, + tqInfo("expand stream task on vg %d, task id %d, child id %d", TD_VID(pTq->pVnode), pTask->taskId, pTask->selfChildId); - -FAIL: - if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); - if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); - // TODO free executor - return code; + return 0; } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) { diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 3776cb261f..6e30eeaa86 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -32,7 +32,6 @@ typedef struct { static SStreamGlobalEnv streamEnv; -int32_t streamExec(SStreamTask* pTask); int32_t streamPipelineExec(SStreamTask* pTask, int32_t batchNum, bool dispatch); int32_t streamDispatch(SStreamTask* pTask); diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 6da7d4fd59..d6e87c2736 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -185,7 +185,9 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S tFreeStreamDispatchReq(pReq); if (exec) { - streamTryExec(pTask); + if (streamTryExec(pTask) < 0) { + return -1; + } if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { streamDispatch(pTask); @@ -221,7 +223,9 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp) { } int32_t streamProcessRunReq(SStreamTask* pTask) { - streamTryExec(pTask); + if (streamTryExec(pTask) < 0) { + return -1; + } if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { streamDispatch(pTask); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 64a9537e6c..f34f68ffc6 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -99,16 +99,19 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* goto FAIL; } - taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)); + if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + goto FAIL; + } if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, &pMeta->txn) < 0) { + taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t)); ASSERT(0); - return -1; + goto FAIL; } return 0; FAIL: - if (pTask) taosMemoryFree(pTask); + if (pTask) tFreeSStreamTask(pTask); return -1; } @@ -158,11 +161,21 @@ int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask* pTask = *ppTask; taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING); + + if (tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), &pMeta->txn) < 0) { + /*return -1;*/ + } + + while (1) { + int8_t schedStatus = + atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__DROPPING); + if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { + tFreeSStreamTask(pTask); + break; + } + } } - if (tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), &pMeta->txn) < 0) { - /*return -1;*/ - } return 0; } diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 6819e5329f..45b78a8c6e 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -38,7 +38,9 @@ void streamQueueClose(SStreamQueue* queue) { if (qItem) { taosFreeQitem(qItem); } else { - return; + break; } } + taosFreeQall(queue->qall); + taosCloseQueue(queue->queue); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 638d39e5cc..0c35c1408e 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -152,8 +152,8 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { } void tFreeSStreamTask(SStreamTask* pTask) { - streamQueueClose(pTask->inputQueue); - streamQueueClose(pTask->outputQueue); + if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); + if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); if (pTask->exec.qmsg) taosMemoryFree(pTask->exec.qmsg); if (pTask->exec.executor) qDestroyTask(pTask->exec.executor); taosMemoryFree(pTask); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 7b06967940..f6b62b5ea8 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -293,6 +293,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_CGROUP_USED, "Consumer group being TAOS_DEFINE_ERROR(TSDB_CODE_MND_STREAM_ALREADY_EXIST, "Stream already exists") TAOS_DEFINE_ERROR(TSDB_CODE_MND_STREAM_NOT_EXIST, "Stream not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STREAM_OPTION, "Invalid stream option") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_STREAM_MUST_BE_DELETED, "Stream must be dropped first") // mnode-sma TAOS_DEFINE_ERROR(TSDB_CODE_MND_SMA_ALREADY_EXIST, "SMA already exists") From 32be8a71ee0c61f6fd52545fdc747c85ef23cf68 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Wed, 17 Aug 2022 20:14:06 +0800 Subject: [PATCH 07/18] enh: rsma batch process --- source/dnode/vnode/src/inc/sma.h | 29 ++- source/dnode/vnode/src/sma/smaCommit.c | 91 ++++---- source/dnode/vnode/src/sma/smaEnv.c | 18 +- source/dnode/vnode/src/sma/smaRollup.c | 302 +++++++++++++++---------- 4 files changed, 259 insertions(+), 181 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index c36207e495..26adc8d5e5 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -96,10 +96,10 @@ struct SRSmaStat { int8_t commitStat; // 0 not in committing, 1 in committing int8_t execStat; // 0 not in exec , 1 in exec SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w) - SHashObj *rsmaInfoHash; // key: stbUid, value: SRSmaInfo; + SHashObj *infoHash; // key: suid, value: SRSmaInfo + SHashObj *fetchHash; // key: suid, value: L1 or L2 or L1|L2 }; - struct SSmaStat { union { STSmaStat tsmaStat; // time-range-wise sma @@ -108,13 +108,14 @@ struct SSmaStat { T_REF_DECLARE() }; -#define SMA_STAT_TSMA(s) (&(s)->tsmaStat) -#define SMA_STAT_RSMA(s) (&(s)->rsmaStat) -#define RSMA_INFO_HASH(r) ((r)->rsmaInfoHash) -#define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat) -#define RSMA_COMMIT_STAT(r) (&(r)->commitStat) -#define RSMA_REF_ID(r) ((r)->refId) -#define RSMA_FS_LOCK(r) (&(r)->lock) +#define SMA_STAT_TSMA(s) (&(s)->tsmaStat) +#define SMA_STAT_RSMA(s) (&(s)->rsmaStat) +#define RSMA_INFO_HASH(r) ((r)->infoHash) +#define RSMA_FETCH_HASH(r) ((r)->fetchHash) +#define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat) +#define RSMA_COMMIT_STAT(r) (&(r)->commitStat) +#define RSMA_REF_ID(r) ((r)->refId) +#define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { int8_t level; @@ -142,7 +143,7 @@ struct SRSmaInfo { #define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) #define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) #define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) -#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) +#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) #define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) enum { @@ -167,6 +168,12 @@ enum { RSMA_RESTORE_SYNC = 2, }; +typedef enum { + RSMA_EXEC_OVERFLOW = 1, // triggered by queue buf overflow + RSMA_EXEC_TIMEOUT = 2, // triggered by timer + RSMA_EXEC_COMMIT = 3, // triggered by commit +} ERsmaExecType; + void tdDestroySmaEnv(SSmaEnv *pSmaEnv); void *tdFreeSmaEnv(SSmaEnv *pSmaEnv); @@ -240,7 +247,7 @@ static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType); void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); -int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type); +int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 101fca3346..25777f90ab 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -121,7 +121,7 @@ static int32_t tdProcessRSmaSyncPreCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); + SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv); SRSmaStat *pRSmaStat = SMA_STAT_RSMA(pStat); // step 1: set rsma stat paused @@ -333,7 +333,34 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } } - // step 3: swap queue/qall and iQueue/iQal + /** + * @brief step 3: consume the SubmitReq in buffer + * 1) This is high cost task and should not put in asyncPreCommit originally. + * 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently. + */ + nLoops = 0; + smaInfo("vgId:%d, start to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(&pRSmaStat->execStat, 0, 1); + if (old == 0) break; + if (++nLoops > 1000) { + sched_yield(); + nLoops = 0; + smaDebug("vgId:%d, wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + } + } + + smaInfo("vgId:%d, end to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + + if (tdRSmaProcessExecImpl(pSma, RSMA_EXEC_COMMIT) < 0) { + atomic_store_8(&pRSmaStat->execStat, 0); + return TSDB_CODE_FAILED; + } + + + // step 4: swap queue/qall and iQueue/iQall // lock taosWLockLatch(SMA_ENV_LOCK(pEnv)); @@ -351,11 +378,12 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } atomic_store_64(&pRSmaStat->qBufSize, 0); - + atomic_store_8(&pRSmaStat->execStat, 0); // unlock taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - // step 4: others + + // step 5: others pRSmaStat->commitAppliedVer = pSma->pVnode->state.applied; return TSDB_CODE_SUCCESS; @@ -375,36 +403,17 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) { SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); - // step 1: consume the SubmitReq in buffer - int32_t nLoops = 0; - smaDebug("vgId:%d start to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - while (pRSmaStat->execStat == 1) { - taosMsleep(15); - if ((++nLoops & 63) == 0) { - smaWarn("vgId:%d 1s waited for rsma exec stat = 0, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - sched_yield(); - } - } - pRSmaStat->execStat = 1; - smaDebug("vgId:%d end to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - if (tdRSmaProcessExecImpl(pSma, 1) < 0) { - pRSmaStat->execStat = 0; - return TSDB_CODE_FAILED; - } - - // step 2: perform persist task for qTaskInfo operator + // perform persist task for qTaskInfo operator if (tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)) < 0) { - pRSmaStat->execStat = 0; return TSDB_CODE_FAILED; } - pRSmaStat->execStat = 0; return TSDB_CODE_SUCCESS; } /** - * @brief Migrate rsmaInfo from iRsmaInfo to rsmaInfo if rsmaInfoHash not empty. + * @brief Migrate rsmaInfo from iRsmaInfo to rsmaInfo if rsma infoHash not empty. * * @param pSma * @return int32_t @@ -424,13 +433,13 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { void *pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL); while (pIter) { - tb_uid_t *pSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); + tb_uid_t *pSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)pIter; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { int32_t refVal = T_REF_VAL_GET(pRSmaInfo); if (refVal == 0) { - if(!rsmaDeleted) { - if((rsmaDeleted = taosArrayInit(1, sizeof(tb_uid_t)))){ + if (!rsmaDeleted) { + if ((rsmaDeleted = taosArrayInit(1, sizeof(tb_uid_t)))) { taosArrayPush(rsmaDeleted, pSuid); } } @@ -461,22 +470,20 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter); } - if (taosArrayGetSize(rsmaDeleted) > 0) { - for (int32_t i = 0; i < taosArrayGetSize(rsmaDeleted); ++i) { - tb_uid_t *pSuid = taosArrayGet(rsmaDeleted, i); - void *pRSmaInfo = taosHashGet(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); - if ((pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { - tdFreeRSmaInfo(pSma, pRSmaInfo, true); - smaDebug( - "vgId:%d, rsma async post commit, free rsma info since already deleted and ref is 0 for " - "table:%" PRIi64, - SMA_VID(pSma), *pSuid); - } - taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); + for (int32_t i = 0; i < taosArrayGetSize(rsmaDeleted); ++i) { + tb_uid_t *pSuid = taosArrayGet(rsmaDeleted, i); + void *pRSmaInfo = taosHashGet(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); + if ((pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { + tdFreeRSmaInfo(pSma, pRSmaInfo, true); + smaDebug( + "vgId:%d, rsma async post commit, free rsma info since already deleted and ref is 0 for " + "table:%" PRIi64, + SMA_VID(pSma), *pSuid); } - // remove suid in files - taosArrayDestroy(rsmaDeleted); + taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); } + taosArrayDestroy(rsmaDeleted); + // TODO: remove suid in files? // unlock taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 73f8060559..f51aad22bd 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -171,7 +171,7 @@ int32_t tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat) { int32_t tdRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo) { if (!pRSmaInfo) return 0; - + int ref = T_REF_INC(pRSmaInfo); smaDebug("vgId:%d, ref rsma info:%p, val:%d", SMA_VID(pSma), pRSmaInfo, ref); return 0; @@ -228,7 +228,12 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS RSMA_INFO_HASH(pRSmaStat) = taosHashInit( RSMA_TASK_INFO_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); if (!RSMA_INFO_HASH(pRSmaStat)) { - taosMemoryFreeClear(*pSmaStat); + return TSDB_CODE_FAILED; + } + + RSMA_FETCH_HASH(pRSmaStat) = taosHashInit( + RSMA_TASK_INFO_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); + if (!RSMA_FETCH_HASH(pRSmaStat)) { return TSDB_CODE_FAILED; } } else if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { @@ -274,7 +279,10 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { } taosHashCleanup(RSMA_INFO_HASH(pStat)); - // step 3: wait all triggered fetch tasks finished + // step 3: destroy the rsma fetch hash + taosHashCleanup(RSMA_FETCH_HASH(pStat)); + + // step 4: wait all triggered fetch tasks finished int32_t nLoops = 0; while (1) { if (T_REF_VAL_GET((SSmaStat *)pStat) == 0) { @@ -289,8 +297,8 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { nLoops = 0; } } - - // step 4: free pStat + + // step 5: free pStat taosMemoryFreeClear(pStat); } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index de4b7dd808..1e2a8b35a4 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -17,7 +17,8 @@ #define RSMA_QTASKINFO_BUFSIZE (32768) #define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid -#define RSMA_QTASKEXEC_BUFSIZ (1048576) +#define RSMA_QTASKEXEC_BUFSIZE (1048576) +#define RSMA_SUBMIT_BATCH_SIZE (1024) SSmaMgmt smaMgmt = { .inited = 0, @@ -35,9 +36,11 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUi static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - int8_t type, int8_t level); + ERsmaExecType type, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); +static void tdFreeRSmaSubmitItems(SArray *pItems); +static int32_t tdRSmaConsumeAndFetch(SSma *pSma, int64_t suid, int8_t level, SArray *pSubmitArr); static int32_t tdRSmaFetchAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, int64_t suid); static void tdRSmaFetchTrigger(void *param, void *tmrId); @@ -559,6 +562,14 @@ void *tdUidStoreFree(STbUidStore *pStore) { return NULL; } +/** + * @brief The SubmitReq for rsma L2/L3 is inserted by tsdbInsertData method directly while not by WriteQ, as the queue + * would be freed when close Vnode, thus lock should be used if with race condition. + * @param pTsdb + * @param version + * @param pReq + * @return int32_t + */ static int32_t tdProcessSubmitReq(STsdb *pTsdb, int64_t version, void *pReq) { if (!pReq) { terrno = TSDB_CODE_INVALID_PTR; @@ -566,7 +577,7 @@ static int32_t tdProcessSubmitReq(STsdb *pTsdb, int64_t version, void *pReq) { } SSubmitReq *pSubmitReq = (SSubmitReq *)pReq; - + // TODO: spin lock for race conditiond if (tsdbInsertData(pTsdb, version, pSubmitReq, NULL) < 0) { return TSDB_CODE_FAILED; } @@ -696,7 +707,7 @@ _err: } /** - * @brief Copy msg to rsmaQueueBuffer + * @brief Copy msg to rsmaQueueBuffer for batch process * * @param pSma * @param pMsg @@ -722,17 +733,17 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, const void *pMsg, int32_t inpu int64_t bufSize = atomic_add_fetch_64(&pRSmaStat->qBufSize, pReq->header.contLen); // smoothing consume - int32_t n = bufSize / RSMA_QTASKEXEC_BUFSIZ; + int32_t n = bufSize / RSMA_QTASKEXEC_BUFSIZE; if (n > 1) { if (n > 10) { n = 10; } taosMsleep(n << 4); if (n > 2) { - smaWarn("vgId:%d pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), + smaWarn("vgId:%d, pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), taosQueueItemSize(pInfo->queue), taosQueueMemorySize(pInfo->queue), n << 4); } else { - smaDebug("vgId:%d pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), + smaDebug("vgId:%d, pInfo->queue itemSize:%d, memSize:%" PRIi64 ", sleep %d ms", SMA_VID(pSma), taosQueueItemSize(pInfo->queue), taosQueueMemorySize(pInfo->queue), n << 4); } } @@ -751,7 +762,7 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { if (pBlock == NULL) break; tInitSubmitBlkIter(&msgIter, pBlock, &blkIter); while ((row = tGetSubmitBlkNext(&blkIter)) != NULL) { - smaDebug("vgId:%d numOfRows:%d, suid:%" PRIi64 ", uid:%" PRIi64 ", version:%" PRIi64 ", ts:%" PRIi64, + smaDebug("vgId:%d, numOfRows:%d, suid:%" PRIi64 ", uid:%" PRIi64 ", version:%" PRIi64 ", ts:%" PRIi64, SMA_VID(pSma), msgIter.numOfRows, msgIter.suid, msgIter.uid, pReq->version, row->ts); } } @@ -771,10 +782,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { * @return int32_t */ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - int8_t type, int8_t level) { + ERsmaExecType type, int8_t level) { int32_t idx = level - 1; - void *qTaskInfo = (type == 0) ? RSMA_INFO_QTASK(pInfo, idx) : RSMA_INFO_IQTASK(pInfo, idx); + void *qTaskInfo = (type == RSMA_EXEC_COMMIT) ? RSMA_INFO_IQTASK(pInfo, idx) : RSMA_INFO_QTASK(pInfo, idx); if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, pInfo->suid); @@ -791,7 +802,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, #if 0 for (int32_t i = 0; i < msgSize; ++i) { SSubmitReq *pReq = *(SSubmitReq **)((char *)pMsg + i * sizeof(void *)); - smaDebug("vgId:%d [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version); + smaDebug("vgId:%d, [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version); tdRsmaPrintSubmitReq(pSma, pReq); } #endif @@ -802,11 +813,6 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); tdRSmaFetchAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid); - atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); - - if (smaMgmt.tmrHandle) { - taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - } return TSDB_CODE_SUCCESS; } @@ -854,13 +860,7 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { } taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); - if (RSMA_COMMIT_STAT(pStat) == 0) { // return NULL if not in committing stat - return NULL; - } - - // unlock - taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - return pRSmaInfo; + return NULL; } static FORCE_INLINE void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { @@ -890,6 +890,16 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, const void *pMsg, int32_t inputTyp tdReleaseRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; } + if (smaMgmt.tmrHandle) { + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, 0); + if (pItem->level > 0) { + atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); + } + pItem = RSMA_INFO_ITEM(pRSmaInfo, 1); + if (pItem->level > 0) { + atomic_store_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE); + } + } } else { ASSERT(0); } @@ -898,51 +908,23 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, const void *pMsg, int32_t inputTyp return TSDB_CODE_SUCCESS; } -#if 0 -/** - * @brief sync mode - * - * @param pSma - * @param pMsg - * @param inputType - * @param suid - * @return int32_t - */ -static int32_t tdExecuteRSma(SSma *pSma, const void *pMsg, int32_t inputType, tb_uid_t suid) { - SRSmaInfo *pRSmaInfo = tdAcquireRSmaInfoBySuid(pSma, suid); - if (!pRSmaInfo) { - smaDebug("vgId:%d, execute rsma, no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); - return TSDB_CODE_SUCCESS; - } - - if (inputType == STREAM_INPUT__DATA_SUBMIT) { - tdExecuteRSmaImpl(pSma, pMsg, inputType, pRSmaInfo, suid, TSDB_RETENTION_L1); - tdExecuteRSmaImpl(pSma, pMsg, inputType, pRSmaInfo, suid, TSDB_RETENTION_L2); - } - - tdReleaseRSmaInfo(pSma, pRSmaInfo); - return TSDB_CODE_SUCCESS; -} -#endif - static int32_t tdRSmaExecCheck(SSma *pSma) { - SRSmaStat *pRsmaStat = SMA_RSMA_STAT(pSma); - int64_t bufSize = atomic_load_64(&pRsmaStat->qBufSize); + SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); + int64_t bufSize = atomic_load_64(&pRSmaStat->qBufSize); - if ((pRsmaStat->execStat == 1) || (bufSize < RSMA_QTASKEXEC_BUFSIZ)) { - if (bufSize > RSMA_QTASKEXEC_BUFSIZ) { - smaDebug("vgId:%d bufSize is %d but has no chance to exec as qTaskInfo occupied by another task", SMA_VID(pSma), - bufSize); - } else { - smaDebug("vgId:%d bufSize is %d but has no chance to exec as less than %d", SMA_VID(pSma), bufSize, - RSMA_QTASKEXEC_BUFSIZ); - } + if (bufSize < RSMA_QTASKEXEC_BUFSIZE) { + smaDebug("vgId:%d, bufSize is %d but has no chance to exec as less than %d", SMA_VID(pSma), bufSize, + RSMA_QTASKEXEC_BUFSIZE); return TSDB_CODE_SUCCESS; } - smaDebug("vgId:%d bufSize is %d and has chance to exec as qTaskInfo is free now", SMA_VID(pSma), bufSize); + if (atomic_val_compare_exchange_8(&pRSmaStat->execStat, 0, 1) == 1) { + smaDebug("vgId:%d, bufSize is %d but has no chance to exec as qTaskInfo occupied by another task", SMA_VID(pSma), + bufSize); + return TSDB_CODE_SUCCESS; + } - pRsmaStat->execStat = 1; + smaDebug("vgId:%d, bufSize is %d and has chance to exec as qTaskInfo is free now", SMA_VID(pSma), bufSize); SRSmaExecMsg fetchMsg; int32_t contLen = sizeof(SMsgHead); @@ -967,7 +949,7 @@ static int32_t tdRSmaExecCheck(SSma *pSma) { return TSDB_CODE_SUCCESS; _err: - pRsmaStat->execStat = 0; + atomic_store_8(&pRSmaStat->execStat, 0); return TSDB_CODE_FAILED; } @@ -1592,7 +1574,7 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } _end: - // taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay, pItem, smaMgmt.tmrHandle, &pItem->tmrId); + taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay, pItem, smaMgmt.tmrHandle, &pItem->tmrId); tdReleaseSmaRef(smaMgmt.rsetId, pRSmaInfo->refId); } @@ -1656,13 +1638,11 @@ _err: * @return int32_t */ int32_t smaProcessFetch(SSma *pSma, void *pMsg) { - SRpcMsg *pRpcMsg = (SRpcMsg *)pMsg; - SRSmaFetchMsg req = {0}; - SDecoder decoder = {0}; - void *pBuf = NULL; - SRSmaInfo *pInfo = NULL; - SRSmaInfoItem *pItem = NULL; - + SRpcMsg *pRpcMsg = (SRpcMsg *)pMsg; + SRSmaFetchMsg req = {0}; + SDecoder decoder = {0}; + void *pBuf = NULL; + SRSmaStat *pRSmaStat = NULL; if (!pRpcMsg || pRpcMsg->contLen < sizeof(SMsgHead)) { terrno = TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP; goto _err; @@ -1676,38 +1656,33 @@ int32_t smaProcessFetch(SSma *pSma, void *pMsg) { goto _err; } -#if 0 - pInfo = tdAcquireRSmaInfoBySuid(pSma, req.suid); - if (!pInfo) { - if (terrno == TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_RSMA_EMPTY_INFO; + pRSmaStat = SMA_RSMA_STAT(pSma); + + if (atomic_val_compare_exchange_8(&pRSmaStat->execStat, 0, 1) == 0) { + SArray *pSubmitArr = NULL; + if (!(pSubmitArr = taosArrayInit(RSMA_SUBMIT_BATCH_SIZE, POINTER_BYTES))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + atomic_store_8(&pRSmaStat->execStat, 0); + goto _err; } - smaWarn("vgId:%d, failed to process rsma fetch msg for suid:%" PRIi64 " level:%" PRIi8 " since %s", SMA_VID(pSma), - req.suid, req.level, terrstr()); - goto _err; + tdRSmaConsumeAndFetch(pSma, req.suid, req.level, pSubmitArr); + atomic_store_8(&pRSmaStat->execStat, 0); + taosArrayDestroy(pSubmitArr); + } else { + int8_t level = req.level; + int8_t *val = taosHashGet(RSMA_FETCH_HASH(pRSmaStat), &req.suid, sizeof(req.suid)); + if (val) { + level |= (*val); + } + ASSERT(level >= 1 && level <= 3); + taosHashPut(RSMA_FETCH_HASH(pRSmaStat), &req.suid, sizeof(req.suid), &level, sizeof(level)); } - pItem = RSMA_INFO_ITEM(pInfo, req.level - 1); - - SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; - qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, req.level - 1); - if ((terrno = qSetMultiStreamInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { - goto _err; - } - if (tdRSmaFetchAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid) < 0) { - goto _err; - } - - tdCleanupStreamInputDataBlock(taskInfo); - - tdReleaseRSmaInfo(pSma, pInfo); -#endif tDecoderClear(&decoder); smaDebug("vgId:%d, success to process rsma fetch msg for suid:%" PRIi64 " level:%" PRIi8, SMA_VID(pSma), req.suid, req.level); return TSDB_CODE_SUCCESS; _err: - // tdReleaseRSmaInfo(pSma, pInfo); tDecoderClear(&decoder); smaError("vgId:%d, failed to process rsma fetch msg since %s", SMA_VID(pSma), terrstr()); return TSDB_CODE_FAILED; @@ -1719,28 +1694,101 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { } } +static int32_t tdRSmaConsumeAndFetch(SSma *pSma, int64_t suid, int8_t level, SArray *pSubmitArr) { + SRSmaInfo *pInfo = tdAcquireRSmaInfoBySuid(pSma, suid); + if (!pInfo) { + return TSDB_CODE_SUCCESS; + } + + // step 1: consume submit req + int64_t qMemSize = 0; + if ((qMemSize = taosQueueMemorySize(pInfo->queue) > 0)) { + taosReadAllQitems(pInfo->queue, pInfo->qall); // queue has mutex lock + + SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); + atomic_fetch_sub_64(&pRSmaStat->qBufSize, qMemSize); + + taosArrayClear(pSubmitArr); + + while (1) { + void *msg = NULL; + taosGetQitem(pInfo->qall, (void **)&msg); + if (msg) { + if (taosArrayPush(pSubmitArr, &msg) < 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; + } + } else { + break; + } + } + + int32_t size = taosArrayGetSize(pSubmitArr); + if (size > 0) { + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, RSMA_EXEC_TIMEOUT, i) < + 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; + } + } + + tdFreeRSmaSubmitItems(pSubmitArr); + } + } + + // step 2: fetch rsma result + SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; + for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (level & i) { + qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, i - 1); + if (!taskInfo) { + continue; + } + if ((terrno = qSetMultiStreamInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { + goto _err; + } + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); + if (tdRSmaFetchAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, suid) < 0) { + tdCleanupStreamInputDataBlock(taskInfo); + goto _err; + } + + tdCleanupStreamInputDataBlock(taskInfo); + } + } + +_end: + tdReleaseRSmaInfo(pSma, pInfo); + return TSDB_CODE_SUCCESS; +_err: + tdReleaseRSmaInfo(pSma, pInfo); + return TSDB_CODE_FAILED; +} + /** * @brief * * @param pSma - * @param type 0 triggered when buffer overflow, 1 triggered by commit + * @param type * @return int32_t */ -int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { +int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); SHashObj *infoHash = NULL; SArray *pSubmitQArr = NULL; SArray *pSubmitArr = NULL; + bool isFetchAll = false; if (!pRSmaStat || !(infoHash = RSMA_INFO_HASH(pRSmaStat))) { terrno = TSDB_CODE_RSMA_INVALID_STAT; goto _err; } - if (type == 0) { + if (type == RSMA_EXEC_OVERFLOW) { taosRLockLatch(SMA_ENV_LOCK(pEnv)); - if (atomic_load_64(&pRSmaStat->qBufSize) < RSMA_QTASKEXEC_BUFSIZ) { + if (atomic_load_64(&pRSmaStat->qBufSize) < RSMA_QTASKEXEC_BUFSIZE) { taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); return TSDB_CODE_SUCCESS; } @@ -1752,19 +1800,19 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { goto _err; } - if (!(pSubmitArr = taosArrayInit(1024, POINTER_BYTES))) { + if (!(pSubmitArr = taosArrayInit(RSMA_SUBMIT_BATCH_SIZE, POINTER_BYTES))) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + // step 1: rsma exec - consume data in buffer queue for all suids SRSmaExecQItem qItem = {0}; - taosWLockLatch(SMA_ENV_LOCK(pEnv)); - void *pIter = taosHashIterate(infoHash, NULL); - if (type == 0) { + void *pIter = taosHashIterate(infoHash, NULL); // infoHash has r/w lock + if (type == RSMA_EXEC_OVERFLOW) { while (pIter) { SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; if (taosQueueItemSize(pInfo->queue)) { - taosReadAllQitems(pInfo->queue, pInfo->qall); + taosReadAllQitems(pInfo->queue, pInfo->qall); // queue has mutex lock qItem.qall = &pInfo->qall; qItem.pRSmaInfo = pIter; taosArrayPush(pSubmitQArr, &qItem); @@ -1772,7 +1820,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { ASSERT(taosQueueItemSize(pInfo->queue) == 0); pIter = taosHashIterate(infoHash, pIter); } - } else if (type == 1) { + } else if (type == RSMA_EXEC_COMMIT) { while (pIter) { SRSmaInfo *pInfo = *(SRSmaInfo **)pIter; if (taosQueueItemSize(pInfo->iQueue)) { @@ -1788,7 +1836,6 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { ASSERT(0); } atomic_store_64(&pRSmaStat->qBufSize, 0); - taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); int32_t qSize = taosArrayGetSize(pSubmitQArr); for (int32_t i = 0; i < qSize; ++i) { @@ -1808,22 +1855,31 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, int8_t type) { int32_t size = taosArrayGetSize(pSubmitArr); if (size > 0) { - if (type == 0 || type == 1) { - SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { - tdFreeRSmaSubmitItems(pSubmitArr); - goto _err; - } + SRSmaInfo *pInfo = *(SRSmaInfo **)pItem->pRSmaInfo; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { + tdFreeRSmaSubmitItems(pSubmitArr); + goto _err; } - } else { - ASSERT(0); } tdFreeRSmaSubmitItems(pSubmitArr); taosArrayClear(pSubmitArr); } } + // step 2: rsma fetch - consume data in buffer queue for suids triggered by timer + if (taosHashGetSize(RSMA_FETCH_HASH(pRSmaStat)) <= 0) { + goto _end; + } + pIter = taosHashIterate(RSMA_FETCH_HASH(pRSmaStat), NULL); + if (pIter) { + tdRSmaConsumeAndFetch(pSma, *(int64_t *)taosHashGetKey(pIter, NULL), *(int8_t *)pIter, pSubmitArr); + while ((pIter = taosHashIterate(RSMA_FETCH_HASH(pRSmaStat), pIter))) { + tdRSmaConsumeAndFetch(pSma, *(int64_t *)taosHashGetKey(pIter, NULL), *(int8_t *)pIter, pSubmitArr); + } + } + +_end: taosArrayDestroy(pSubmitArr); taosArrayDestroy(pSubmitQArr); return TSDB_CODE_SUCCESS; @@ -1842,23 +1898,23 @@ _err: */ int32_t smaProcessExec(SSma *pSma, void *pMsg) { SRpcMsg *pRpcMsg = (SRpcMsg *)pMsg; - SRSmaStat *pRsmaStat = SMA_RSMA_STAT(pSma); + SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); if (!pRpcMsg || pRpcMsg->contLen < sizeof(SMsgHead)) { terrno = TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP; goto _err; } - smaDebug("vgId:%d, begin to process rsma exec msg by thread:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - if (tdRSmaProcessExecImpl(pSma, 0) < 0) { + smaDebug("vgId:%d, begin to process rsma exec msg by TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + if (tdRSmaProcessExecImpl(pSma, RSMA_EXEC_OVERFLOW) < 0) { goto _err; } - pRsmaStat->execStat = 0; - smaDebug("vgId:%d, success to process rsma exec msg by thead:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + atomic_store_8(&pRSmaStat->execStat, 0); + smaDebug("vgId:%d, success to process rsma exec msg by TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); return TSDB_CODE_SUCCESS; _err: - pRsmaStat->execStat = 0; - smaError("vgId:%d, failed to process rsma fetch msg by thread:%p since %s", SMA_VID(pSma), - (void *)taosGetSelfPthreadId(), terrstr()); + atomic_store_8(&pRSmaStat->execStat, 0); + smaError("vgId:%d, failed to process rsma exec msg by TID:%p since %s", SMA_VID(pSma), (void *)taosGetSelfPthreadId(), + terrstr()); return TSDB_CODE_FAILED; } From ea891dc79353ee862509bf26ae0182e7d879b69b Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Wed, 17 Aug 2022 20:30:42 +0800 Subject: [PATCH 08/18] other: code optimization --- source/dnode/vnode/src/vnd/vnodeCommit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 2f5169a0ec..ab618fe430 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -222,6 +222,13 @@ int vnodeCommit(SVnode *pVnode) { pVnode->state.commitTerm = pVnode->state.applyTerm; + // preCommit + // smaSyncPreCommit(pVnode->pSma); + smaAsyncPreCommit(pVnode->pSma); + + vnodeBufPoolUnRef(pVnode->inUse); + pVnode->inUse = NULL; + // save info info.config = pVnode->config; info.state.committed = pVnode->state.applied; @@ -234,10 +241,6 @@ int vnodeCommit(SVnode *pVnode) { } walBeginSnapshot(pVnode->pWal, pVnode->state.applied); - // preCommit - // smaSyncPreCommit(pVnode->pSma); - smaAsyncPreCommit(pVnode->pSma); - // commit each sub-system if (metaCommit(pVnode->pMeta) < 0) { ASSERT(0); @@ -245,7 +248,7 @@ int vnodeCommit(SVnode *pVnode) { } if (VND_IS_RSMA(pVnode)) { - smaAsyncCommit(pVnode->pSma); // would write L2/L3 data into BufPool + smaAsyncCommit(pVnode->pSma); if (tsdbCommit(VND_RSMA0(pVnode)) < 0) { ASSERT(0); From d65b75514a130983f0cb035f91516e72cc167a88 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Wed, 17 Aug 2022 20:38:34 +0800 Subject: [PATCH 09/18] fix: remove obsoleted codes --- source/dnode/vnode/src/vnd/vnodeCommit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index ab618fe430..fc09eaac44 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -268,9 +268,6 @@ int vnodeCommit(SVnode *pVnode) { return -1; } } - - vnodeBufPoolUnRef(pVnode->inUse); - pVnode->inUse = NULL; if (tqCommit(pVnode->pTq) < 0) { ASSERT(0); From 963287798dcacd32ef03b70f83f3813c006e0c67 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 17 Aug 2022 19:44:53 +0800 Subject: [PATCH 10/18] refactor(mnode): drop stream task --- include/libs/stream/tstream.h | 1 - source/dnode/mnode/impl/src/mndStb.c | 7 ++++- source/dnode/vnode/src/tq/tqSink.c | 44 +++++++++++++++------------- source/libs/stream/src/streamMeta.c | 8 +++++ source/libs/stream/src/streamQueue.c | 5 ++-- source/libs/stream/src/streamTask.c | 8 +++++ 6 files changed, 48 insertions(+), 25 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 484d0991f2..384c6a289f 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -216,7 +216,6 @@ typedef struct { void* vnode; FTbSink* tbSinkFunc; STSchema* pTSchema; - SHashObj* pHash; // groupId to tbuid } STaskSinkTb; typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 59c6d65953..6083a76981 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2044,7 +2044,12 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) break; - if (pStream->smaId == 0 && pStream->targetStbUid == suid) { + if (pStream->smaId != 0) { + sdbRelease(pSdb, pStream); + continue; + } + + if (pStream->targetStbUid == suid) { sdbRelease(pSdb, pStream); return -1; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 55630511bf..522bf46aa1 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -231,34 +231,35 @@ void tqTableSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { ASSERT(pTask->tbSink.pTSchema); deleteReq.deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); - SSubmitReq* pReq = tqBlockToSubmit(pVnode, pRes, pTask->tbSink.pTSchema, true, pTask->tbSink.stbUid, - pTask->tbSink.stbFullName, &deleteReq); + SSubmitReq* submitReq = tqBlockToSubmit(pVnode, pRes, pTask->tbSink.pTSchema, true, pTask->tbSink.stbUid, + pTask->tbSink.stbFullName, &deleteReq); tqDebug("vgId:%d, task %d convert blocks over, put into write-queue", TD_VID(pVnode), pTask->taskId); - int32_t code; - int32_t len; - tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); - if (code < 0) { - // - ASSERT(0); - } - SEncoder encoder; - void* buf = rpcMallocCont(len + sizeof(SMsgHead)); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); - tEncoderClear(&encoder); - - ((SMsgHead*)buf)->vgId = pVnode->config.vgId; - if (taosArrayGetSize(deleteReq.deleteReqs) != 0) { + int32_t code; + int32_t len; + tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); + if (code < 0) { + // + ASSERT(0); + } + SEncoder encoder; + void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); + void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, len); + tEncodeSBatchDeleteReq(&encoder, &deleteReq); + tEncoderClear(&encoder); + + ((SMsgHead*)serializedDeleteReq)->vgId = pVnode->config.vgId; + SRpcMsg msg = { .msgType = TDMT_VND_BATCH_DEL, - .pCont = buf, + .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead), }; if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + rpcFreeCont(serializedDeleteReq); tqDebug("failed to put into write-queue since %s", terrstr()); } } @@ -268,11 +269,12 @@ void tqTableSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { // build write msg SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, - .pCont = pReq, - .contLen = ntohl(pReq->length), + .pCont = submitReq, + .contLen = ntohl(submitReq->length), }; if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + rpcFreeCont(submitReq); tqDebug("failed to put into write-queue since %s", terrstr()); } } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index f34f68ffc6..5ff700546c 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -15,6 +15,7 @@ #include "executor.h" #include "tstream.h" +#include "ttimer.h" SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc) { SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); @@ -166,13 +167,20 @@ int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { /*return -1;*/ } + if (pTask->triggerParam != 0) { + taosTmrStop(pTask->timer); + } + while (1) { int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__DROPPING); if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { tFreeSStreamTask(pTask); break; + } else if (schedStatus == TASK_SCHED_STATUS__DROPPING) { + break; } + taosMsleep(10); } } diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 45b78a8c6e..ac10c82587 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "tstream.h" +#include "streamInc.h" SStreamQueue* streamQueueOpen() { SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); @@ -36,11 +36,12 @@ void streamQueueClose(SStreamQueue* queue) { while (1) { void* qItem = streamQueueNextItem(queue); if (qItem) { - taosFreeQitem(qItem); + streamFreeQitem(qItem); } else { break; } } taosFreeQall(queue->qall); taosCloseQueue(queue->queue); + taosMemoryFree(queue); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 0c35c1408e..d588d90543 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -156,5 +156,13 @@ void tFreeSStreamTask(SStreamTask* pTask) { if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); if (pTask->exec.qmsg) taosMemoryFree(pTask->exec.qmsg); if (pTask->exec.executor) qDestroyTask(pTask->exec.executor); + taosArrayDestroy(pTask->childEpInfo); + if (pTask->outputType == TASK_OUTPUT__TABLE) { + tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper); + taosMemoryFree(pTask->tbSink.pTSchema); + } + if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); + } taosMemoryFree(pTask); } From 5f54f90416dbf31021a75c30cc7f11e0c2fc2c93 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 18 Aug 2022 09:30:24 +0800 Subject: [PATCH 11/18] fix: empty struct --- include/common/tmsg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index cedbdb6087..8948d9f670 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2664,7 +2664,7 @@ typedef struct { } SVgEpSet; typedef struct { - // padding + int64_t padding; } SRSmaExecMsg; typedef struct { From c5d3440b8419abc8b34964a0777acd5e06d9bb56 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 18 Aug 2022 09:58:24 +0800 Subject: [PATCH 12/18] other: code optimization --- include/common/tmsg.h | 2 +- source/dnode/vnode/src/inc/sma.h | 12 +++++++----- source/dnode/vnode/src/vnd/vnodeCommit.c | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index cedbdb6087..f870bd161f 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2664,7 +2664,7 @@ typedef struct { } SVgEpSet; typedef struct { - // padding + int32_t padding; } SRSmaExecMsg; typedef struct { diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 26adc8d5e5..c43772062e 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -118,17 +118,19 @@ struct SSmaStat { #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { - int8_t level; - int8_t triggerStat; - int32_t maxDelay; - tmr_h tmrId; + int8_t level; + int8_t triggerStat; + uint16_t interval; // second + int32_t maxDelay; + tmr_h tmrId; }; struct SRSmaInfo { STSchema *pTSchema; int64_t suid; int64_t refId; // refId of SRSmaStat - int8_t delFlag; + uint64_t delFlag : 1; + uint64_t lastReceived : 63; // second T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index fc09eaac44..89542baa25 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -220,8 +220,6 @@ int vnodeCommit(SVnode *pVnode) { vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64, TD_VID(pVnode), pVnode->state.commitID, pVnode->state.applied); - pVnode->state.commitTerm = pVnode->state.applyTerm; - // preCommit // smaSyncPreCommit(pVnode->pSma); smaAsyncPreCommit(pVnode->pSma); @@ -229,6 +227,8 @@ int vnodeCommit(SVnode *pVnode) { vnodeBufPoolUnRef(pVnode->inUse); pVnode->inUse = NULL; + pVnode->state.commitTerm = pVnode->state.applyTerm; + // save info info.config = pVnode->config; info.state.committed = pVnode->state.applied; From 64747cb029bbecd31d414c7eab8fe992ec8701b2 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 18 Aug 2022 10:02:27 +0800 Subject: [PATCH 13/18] other: code optimization --- source/dnode/vnode/src/vnd/vnodeCommit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 89542baa25..64f223b974 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -228,7 +228,7 @@ int vnodeCommit(SVnode *pVnode) { pVnode->inUse = NULL; pVnode->state.commitTerm = pVnode->state.applyTerm; - + // save info info.config = pVnode->config; info.state.committed = pVnode->state.applied; From 7e73f3617303e37f7d2d5123c90eb51ac1f821be Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 18 Aug 2022 10:08:22 +0800 Subject: [PATCH 14/18] other: code format revert --- source/libs/executor/inc/executil.h | 2 +- source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/src/executil.c | 2 +- source/libs/executor/src/executorimpl.c | 2 +- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/timewindowoperator.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index d5b979c762..52c73f85f5 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -140,4 +140,4 @@ int32_t resultrowComparAsc(const void* p1, const void* p2); int32_t isQualifiedTable(STableKeyInfo* info, SNode* pTagCond, void* metaHandle, bool* pQualified); -#endif // TDENGINE_QUERYUTIL_H \ No newline at end of file +#endif // TDENGINE_QUERYUTIL_H diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 11d371d49b..311d82c8a2 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -1045,4 +1045,4 @@ void* destroySqlFunctionCtx(SqlFunctionCtx* pCtx, int32_t numOfOutput); } #endif -#endif // TDENGINE_EXECUTORIMPL_H \ No newline at end of file +#endif // TDENGINE_EXECUTORIMPL_H diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 615d742d40..34247d3b47 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -986,4 +986,4 @@ void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimit pLimitInfo->slimit = slimit; pLimitInfo->remainOffset = limit.offset; pLimitInfo->remainGroupOffset = slimit.offset; -} \ No newline at end of file +} diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 4380a662fe..d15dc99122 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4777,4 +4777,4 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, const char* pKey, SqlF pCtx[i].pBuf = pSup->pResultBuf; } return code; -} \ No newline at end of file +} diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index c26f468238..8c4fbe7971 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3134,4 +3134,4 @@ _error: taosMemoryFree(pInfo); taosMemoryFree(pOperator); return NULL; -} \ No newline at end of file +} diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index b81cb7724f..6418f5305c 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -5487,4 +5487,4 @@ _error: taosMemoryFreeClear(pOperator); pTaskInfo->code = code; return NULL; -} \ No newline at end of file +} From fb2bb0481ec9bb66f2e7b3f9213ff44b883aa709 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Thu, 18 Aug 2022 13:22:59 +0800 Subject: [PATCH 15/18] fix(query): fix interp pResBlock->rows > capacity cause assert failure --- source/libs/executor/src/timewindowoperator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 3e931a489d..757ab09d1a 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2154,9 +2154,7 @@ static void doKeepLinearInfo(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlo static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp* pExprSup, SSDataBlock* pResBlock) { int32_t rows = pResBlock->info.rows; - if (rows >= pResBlock->info.capacity) { - return; - } + blockDataEnsureCapacity(pResBlock, rows + 1); // todo set the correct primary timestamp column // output the result From a2757eb4f381ff4dc7b561fa1ca90caa572aa90a Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 18 Aug 2022 14:26:11 +0800 Subject: [PATCH 16/18] fix(mnode): memory leak --- source/dnode/mnode/impl/inc/mndDef.h | 1 + source/dnode/mnode/impl/inc/mndStb.h | 1 + source/dnode/mnode/impl/src/mndDef.c | 19 +++++++++++++++++++ source/dnode/mnode/impl/src/mndStb.c | 16 ++++++++++------ source/dnode/mnode/impl/src/mndStream.c | 13 ++++++++++++- 5 files changed, 43 insertions(+), 7 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 455da6a40e..8cff7fe48e 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -636,6 +636,7 @@ typedef struct { int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj); +void tFreeStreamObj(SStreamObj* pObj); typedef struct { char streamName[TSDB_STREAM_FNAME_LEN]; diff --git a/source/dnode/mnode/impl/inc/mndStb.h b/source/dnode/mnode/impl/inc/mndStb.h index 44a7fdadde..010199a89f 100644 --- a/source/dnode/mnode/impl/inc/mndStb.h +++ b/source/dnode/mnode/impl/inc/mndStb.h @@ -34,6 +34,7 @@ int32_t mndCheckCreateStbReq(SMCreateStbReq *pCreate); SDbObj *mndAcquireDbByStb(SMnode *pMnode, const char *stbName); int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreate, SDbObj *pDb); int32_t mndAddStbToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SStbObj *pStb); +void mndFreeStb(SStbObj *pStb); void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst); void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize); diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 08ce161409..e6f1a40993 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -116,6 +116,25 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { return 0; } +void tFreeStreamObj(SStreamObj *pStream) { + taosMemoryFree(pStream->sql); + taosMemoryFree(pStream->ast); + taosMemoryFree(pStream->physicalPlan); + if (pStream->outputSchema.nCols) taosMemoryFree(pStream->outputSchema.pSchema); + + int32_t sz = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < sz; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + int32_t taskSz = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < taskSz; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + tFreeSStreamTask(pTask); + } + taosArrayDestroy(pLevel); + } + taosArrayDestroy(pStream->tasks); +} + SMqVgEp *tCloneSMqVgEp(const SMqVgEp *pVgEp) { SMqVgEp *pVgEpNew = taosMemoryMalloc(sizeof(SMqVgEp)); if (pVgEpNew == NULL) return NULL; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 6083a76981..e0f2b83160 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -266,6 +266,15 @@ _OVER: return pRow; } +void mndFreeStb(SStbObj *pStb) { + taosArrayDestroy(pStb->pFuncs); + taosMemoryFreeClear(pStb->pColumns); + taosMemoryFreeClear(pStb->pTags); + taosMemoryFreeClear(pStb->comment); + taosMemoryFreeClear(pStb->pAst1); + taosMemoryFreeClear(pStb->pAst2); +} + static int32_t mndStbActionInsert(SSdb *pSdb, SStbObj *pStb) { mTrace("stb:%s, perform insert action, row:%p", pStb->name, pStb); return 0; @@ -273,12 +282,7 @@ static int32_t mndStbActionInsert(SSdb *pSdb, SStbObj *pStb) { static int32_t mndStbActionDelete(SSdb *pSdb, SStbObj *pStb) { mTrace("stb:%s, perform delete action, row:%p", pStb->name, pStb); - taosArrayDestroy(pStb->pFuncs); - taosMemoryFreeClear(pStb->pColumns); - taosMemoryFreeClear(pStb->pTags); - taosMemoryFreeClear(pStb->comment); - taosMemoryFreeClear(pStb->pAst1); - taosMemoryFreeClear(pStb->pAst2); + mndFreeStb(pStb); return 0; } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 8c453e0c88..6dc8e2072b 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -167,6 +167,9 @@ static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream) { static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream) { mTrace("stream:%s, perform delete action", pStream->name); + taosWLockLatch(&pStream->lock); + tFreeStreamObj(pStream); + taosWUnLockLatch(&pStream->lock); return 0; } @@ -493,10 +496,17 @@ static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStre stbObj.uid = pStream->targetStbUid; - if (mndAddStbToTrans(pMnode, pTrans, pDb, &stbObj) < 0) goto _OVER; + if (mndAddStbToTrans(pMnode, pTrans, pDb, &stbObj) < 0) { + mndFreeStb(&stbObj); + goto _OVER; + } + + tFreeSMCreateStbReq(&createReq); + mndFreeStb(&stbObj); return 0; _OVER: + tFreeSMCreateStbReq(&createReq); mndReleaseStb(pMnode, pStb); mndReleaseDb(pMnode, pDb); return -1; @@ -715,6 +725,7 @@ _OVER: mndReleaseDb(pMnode, pDb); tFreeSCMCreateStreamReq(&createStreamReq); + tFreeStreamObj(&streamObj); return code; } From 8a32c0c1898bc6a90d6bbe73592eb44f15f0c70f Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 18 Aug 2022 14:55:57 +0800 Subject: [PATCH 17/18] refactor(sync): adjust strategy for dynamic quorum --- include/libs/sync/sync.h | 1 + source/libs/sync/inc/syncInt.h | 2 +- source/libs/sync/src/syncCommit.c | 40 ++++++++++++++++++++++++------- source/libs/sync/src/syncMain.c | 22 ++++++++++++++--- 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 952066df46..790cbf906d 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -34,6 +34,7 @@ extern bool gRaftDetailLog; #define SYNC_MAX_PROGRESS_WAIT_MS 4000 #define SYNC_MAX_START_TIME_RANGE_MS (1000 * 20) #define SYNC_MAX_RECV_TIME_RANGE_MS 1000 +#define SYNC_ADD_QUORUM_COUNT 3 #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index de43c81654..0afc373f2d 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -237,7 +237,7 @@ void syncNodeVoteForSelf(SSyncNode* pSyncNode); bool syncNodeHasSnapshot(SSyncNode* pSyncNode); void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode); -SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode); +SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode); SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode); int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm); diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 3829ea0730..1e68fe346c 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -146,23 +146,47 @@ int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { int64_t timeNow = taosGetTimestampMs(); for (int i = 0; i < pSyncNode->peersNum; ++i) { - int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); + int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); + int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); + SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]); - int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow); - int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime); + int64_t recvTimeDiff = TABS(peerRecvTime - timeNow); + int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime); + int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode)); + + /* + int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow); + int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime); + int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode)); + */ int32_t addQuorum = 0; if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - addQuorum = 1; + if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) { + addQuorum = 1; + } else { + if (logDiff < SYNC_ADD_QUORUM_COUNT) { + addQuorum = 1; + } else { + addQuorum = 0; + } + } } else { addQuorum = 0; } - if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 0; - } + /* + if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { + addQuorum = 1; + } else { + addQuorum = 0; + } + + if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) { + addQuorum = 0; + } + */ quorum += addQuorum; } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index a00b59d292..3fe600ecbb 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2284,7 +2284,7 @@ bool syncNodeHasSnapshot(SSyncNode* pSyncNode) { // return max(logLastIndex, snapshotLastIndex) // if no snapshot and log, return -1 -SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode) { +SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode) { SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0, .lastConfigIndex = -1}; if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) { pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); @@ -2773,11 +2773,27 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p return 0; } - if (ths->vgId > 1) { - syncNodeEventLog(ths, "I am vnode, can not do leader transfer"); + if (pEntry->term < ths->pRaftStore->currentTerm) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "little term:%lu, can not do leader transfer", pEntry->term); + syncNodeEventLog(ths, logBuf); return 0; } + if (pEntry->index < syncNodeGetLastIndex(ths)) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "little index:%ld, can not do leader transfer", pEntry->index); + syncNodeEventLog(ths, logBuf); + return 0; + } + + /* + if (ths->vgId > 1) { + syncNodeEventLog(ths, "I am vnode, can not do leader transfer"); + return 0; + } + */ + do { char logBuf[128]; snprintf(logBuf, sizeof(logBuf), "do leader transfer, index:%ld", pEntry->index); From d9e978814ad66b87478a88e53a506afe57a02a88 Mon Sep 17 00:00:00 2001 From: Hui Li <52318143+plum-lihui@users.noreply.github.com> Date: Thu, 18 Aug 2022 15:33:12 +0800 Subject: [PATCH 18/18] ci: rust dir The 2.x and 3.x have the same file name but different contents. --- Jenkinsfile2 | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 754617f99f..12e806c87a 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -43,6 +43,7 @@ def pre_test(){ cd ${WKC} git reset --hard git clean -fxd + rm -rf examples/rust/ git remote prune origin git fetch '''