From 7930260feda84ff59acb6459b504ccba88e08c8a Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 11 Jun 2022 16:44:01 +0800 Subject: [PATCH 01/36] other: add debug logs when load block info --- source/dnode/vnode/src/tsdb/tsdbReadImpl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index 1c2514d46f..7e3dc5b98f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -233,6 +233,17 @@ int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget) { tsdbError("vgId:%d, SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u", TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len); return -1; + } else { + SBlockInfo *pBlkInfo = pReadh->pBlkInfo; + SBlock *pBlk = NULL; + int nBlks = (pBlkIdx->len - sizeof(SBlockInfo)) / sizeof(SBlock); + for (int n = 0; n < nBlks; ++n) { + pBlk = &pBlkInfo->blocks[n]; + if (pBlk->numOfSubBlocks == 1) { + tsdbDebug("prop:vgId:%d, file %s , offset:%u len :%u has %d rows", TSDB_READ_REPO_ID(pReadh), + TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, pBlk->numOfRows); + } + } } // ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid); From 9b8a7d11df194c003006ea000ab5c4bac781cff7 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 11 Jun 2022 18:50:28 +0800 Subject: [PATCH 02/36] other: add logs when read blk info --- source/dnode/vnode/src/tsdb/tsdbReadImpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index 7e3dc5b98f..e4c5ef8fc3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -239,7 +239,7 @@ int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget) { int nBlks = (pBlkIdx->len - sizeof(SBlockInfo)) / sizeof(SBlock); for (int n = 0; n < nBlks; ++n) { pBlk = &pBlkInfo->blocks[n]; - if (pBlk->numOfSubBlocks == 1) { + if (pBlk->numOfSubBlocks >= 1) { tsdbDebug("prop:vgId:%d, file %s , offset:%u len :%u has %d rows", TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, pBlk->numOfRows); } From f2de1af904765db5a6e8e08eca2485ba02f703a2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 15 Jun 2022 23:00:31 +0800 Subject: [PATCH 03/36] enh(query): partition by support arithmetic expr, and do some internal refactor. --- include/libs/function/function.h | 4 - source/libs/executor/inc/executil.h | 1 - source/libs/executor/inc/executorimpl.h | 26 ++-- source/libs/executor/src/executil.c | 11 -- source/libs/executor/src/executorimpl.c | 133 +----------------- source/libs/executor/src/groupoperator.c | 49 +++++-- source/libs/executor/src/scanoperator.c | 11 +- source/libs/executor/src/timewindowoperator.c | 5 - source/libs/function/inc/texpr.h | 67 --------- source/libs/function/src/texpr.c | 8 -- 10 files changed, 59 insertions(+), 256 deletions(-) delete mode 100644 source/libs/function/inc/texpr.h diff --git a/include/libs/function/function.h b/include/libs/function/function.h index b359fa5d6a..54762a0f17 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -150,12 +150,8 @@ typedef struct SqlFunctionCtx { } SqlFunctionCtx; enum { - TEXPR_NODE_DUMMY = 0x0, TEXPR_BINARYEXPR_NODE= 0x1, TEXPR_UNARYEXPR_NODE = 0x2, - TEXPR_FUNCTION_NODE = 0x3, - TEXPR_COL_NODE = 0x4, - TEXPR_VALUE_NODE = 0x8, }; typedef struct tExprNode { diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index ca25b7aab1..a70cff5552 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -55,7 +55,6 @@ typedef struct SResultRow { uint32_t numOfRows; // number of rows of current time window STimeWindow win; struct SResultRowEntryInfo pEntryInfo[]; // For each result column, there is a resultInfo -// char *key; // start key of current result row } SResultRow; typedef struct SResultRowPosition { diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index e3e85bf2b8..49316ff746 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -540,6 +540,13 @@ typedef struct SFillOperatorInfo { bool multigroupResult; } SFillOperatorInfo; +typedef struct SScalarSupp { + SExprInfo* pScalarExprInfo; + int32_t numOfScalarExpr; // the number of scalar expression in group operator + SqlFunctionCtx* pScalarFuncCtx; + int32_t* rowCellInfoOffset; // offset value for each row result cell info +} SScalarSupp; + typedef struct SGroupbyOperatorInfo { // SOptrBasicInfo should be first, SAggSupporter should be second for stream encode SOptrBasicInfo binfo; @@ -552,10 +559,7 @@ typedef struct SGroupbyOperatorInfo { char* keyBuf; // group by keys for hash int32_t groupKeyLen; // total group by column width SGroupResInfo groupResInfo; - SExprInfo* pScalarExprInfo; - int32_t numOfScalarExpr; // the number of scalar expression in group operator - SqlFunctionCtx* pScalarFuncCtx; - int32_t* rowCellInfoOffset; // offset value for each row result cell info + SScalarSupp scalarSup; } SGroupbyOperatorInfo; typedef struct SDataGroupInfo { @@ -578,6 +582,7 @@ typedef struct SPartitionOperatorInfo { int32_t* columnOffset; // start position for each column data void* pGroupIter; // group iterator int32_t pageIndex; // page index of current group + SScalarSupp scalarSupp; } SPartitionOperatorInfo; typedef struct SWindowRowsSup { @@ -755,6 +760,8 @@ void getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t *order, int32_t* scanFlag); int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz); +SArray* extractPartitionColInfo(SNodeList* pNodeList); + void doSetOperatorCompleted(SOperatorInfo* pOperator); void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock); SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, int32_t** rowCellInfoOffset); @@ -834,20 +841,17 @@ SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExp SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfCols, SSDataBlock* pResBlock, STimeWindowAggSupp *pTwAggSupp, int32_t tsSlotId, SColumn* pStateKeyCol, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResultBlock, SArray* pGroupColList, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResultBlock, const SNodeListNode* pValNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, SNode* pOnCondition, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, - SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, int64_t gap, - int32_t tsSlotId, STimeWindowAggSupp* pTwAggSupp, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, + SSDataBlock* pResBlock, int64_t gap, int32_t tsSlotId, STimeWindowAggSupp* pTwAggSupp, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, - SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); #if 0 SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 6ba1cf859e..01ed30c189 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -28,17 +28,6 @@ typedef struct SCompSupporter { int32_t order; } SCompSupporter; -int32_t getOutputInterResultBufSize(STaskAttr* pQueryAttr) { - int32_t size = 0; - - for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { -// size += pQueryAttr->pExpr1[i].base.interBytes; - } - - assert(size >= 0); - return size; -} - int32_t initResultRowInfo(SResultRowInfo *pResultRowInfo, int32_t size) { pResultRowInfo->size = 0; pResultRowInfo->cur.pageId = -1; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index de109a24ca..89692c0061 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -40,9 +40,6 @@ #define IS_MAIN_SCAN(runtime) ((runtime)->scanFlag == MAIN_SCAN) #define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN) -#define SDATA_BLOCK_INITIALIZER \ - (SDataBlockInfo) { {0}, 0 } - #define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) #if 0 @@ -95,8 +92,6 @@ static void setBlockStatisInfo(SqlFunctionCtx* pCtx, SExprInfo* pExpr, SSDataBlo static void destroyTableQueryInfoImpl(STableQueryInfo* pTableQueryInfo); -static SColumnInfo* extractColumnFilterInfo(SExprInfo* pExpr, int32_t numOfOutput, int32_t* numOfFilterCols); - static void releaseQueryBuf(size_t numOfTables); static void destroySFillOperatorInfo(void* param, int32_t numOfOutput); @@ -454,75 +449,6 @@ static bool chkWindowOutputBufByKey(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo return chkResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char*)&win->skey, TSDB_KEYSIZE, masterscan, groupId); } -static void doUpdateResultRowIndex(SResultRowInfo* pResultRowInfo, TSKEY lastKey, bool ascQuery, - bool timeWindowInterpo) { - int64_t skey = TSKEY_INITIAL_VAL; -#if 0 - int32_t i = 0; - for (i = pResultRowInfo->size - 1; i >= 0; --i) { - SResultRow* pResult = pResultRowInfo->pResult[i]; - if (pResult->closed) { - break; - } - - // new closed result rows - if (timeWindowInterpo) { - if (pResult->endInterp && - ((pResult->win.skey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery))) { - if (i > 0) { // the first time window, the startInterp is false. - assert(pResult->startInterp); - } - - closeResultRow(pResultRowInfo, i); - } else { - skey = pResult->win.skey; - } - } else { - if ((pResult->win.ekey <= lastKey && ascQuery) || (pResult->win.skey >= lastKey && !ascQuery)) { - closeResultRow(pResultRowInfo, i); - } else { - skey = pResult->win.skey; - } - } - } - - // all result rows are closed, set the last one to be the skey - if (skey == TSKEY_INITIAL_VAL) { - if (pResultRowInfo->size == 0) { - // assert(pResultRowInfo->current == NULL); - assert(pResultRowInfo->curPos == -1); - pResultRowInfo->curPos = -1; - } else { - pResultRowInfo->curPos = pResultRowInfo->size - 1; - } - } else { - for (i = pResultRowInfo->size - 1; i >= 0; --i) { - SResultRow* pResult = pResultRowInfo->pResult[i]; - if (pResult->closed) { - break; - } - } - - if (i == pResultRowInfo->size - 1) { - pResultRowInfo->curPos = i; - } else { - pResultRowInfo->curPos = i + 1; // current not closed result object - } - } -#endif -} -// -// static void updateResultRowInfoActiveIndex(SResultRowInfo* pResultRowInfo, const STimeWindow* pWin, TSKEY lastKey, -// bool ascQuery, bool interp) { -// if ((lastKey > pWin->ekey && ascQuery) || (lastKey < pWin->ekey && (!ascQuery))) { -// closeAllResultRows(pResultRowInfo); -// pResultRowInfo->curPos = pResultRowInfo->size - 1; -// } else { -// int32_t step = ascQuery ? 1 : -1; -// doUpdateResultRowIndex(pResultRowInfo, lastKey - step, ascQuery, interp); -// } -//} - // query_range_start, query_range_end, window_duration, window_start, window_end void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow) { pColData->info.type = TSDB_DATA_TYPE_TIMESTAMP; @@ -810,20 +736,6 @@ int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBloc // _rowts/_c0, not tbname column if (fmIsPseudoColumnFunc(pfCtx->functionId) && (!fmIsScanPseudoColumnFunc(pfCtx->functionId))) { // do nothing - } else if (fmIsIndefiniteRowsFunc(pfCtx->functionId)) { - SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[k]); - pfCtx->fpSet.init(&pCtx[k], pResInfo); - - pfCtx->pOutput = taosArrayGet(pResult->pDataBlock, outputSlotId); - pfCtx->offset = createNewColModel ? 0 : pResult->info.rows; // set the start offset - - // set the timestamp(_rowts) output buffer - if (taosArrayGetSize(pPseudoList) > 0) { - int32_t* outputColIndex = taosArrayGet(pPseudoList, 0); - pfCtx->pTsOutput = (SColumnInfoData*)pCtx[*outputColIndex].pOutput; - } - - numOfRows = pfCtx->fpSet.process(pfCtx); } else { SArray* pBlockList = taosArrayInit(4, POINTER_BYTES); taosArrayPush(pBlockList, &pSrcBlock); @@ -886,7 +798,6 @@ int32_t setGroupResultOutputBuf(SOptrBasicInfo* binfo, int32_t numOfCols, char* doSetResultOutBufByKey(pBuf, pResultRowInfo, (char*)pData, bytes, true, groupId, pTaskInfo, false, pAggSup); assert(pResultRow != NULL); - setResultRowKey(pResultRow, pData, type); setResultRowInitCtx(pResultRow, pCtx, numOfCols, binfo->rowCellInfoOffset); return TSDB_CODE_SUCCESS; } @@ -908,21 +819,6 @@ bool functionNeedToExecute(SqlFunctionCtx* pCtx) { return false; } - // if (functionId == FUNCTION_FIRST_DST || functionId == FUNCTION_FIRST) { - // // return QUERY_IS_ASC_QUERY(pQueryAttr); - // } - // - // // denote the order type - // if ((functionId == FUNCTION_LAST_DST || functionId == FUNCTION_LAST)) { - // // return pCtx->param[0].i == pQueryAttr->order.order; - // } - - // in the reverse table scan, only the following functions need to be executed - // if (IS_REVERSE_SCAN(pRuntimeEnv) || - // (pRuntimeEnv->scanFlag == REPEAT_SCAN && functionId != FUNCTION_STDDEV && functionId != FUNCTION_PERCT)) { - // return false; - // } - return true; } @@ -3934,27 +3830,6 @@ void initResultSizeInfo(SOperatorInfo* pOperator, int32_t numOfRows) { } } -// static STableQueryInfo* initTableQueryInfo(const STableListInfo* pTableListInfo) { -// int32_t size = taosArrayGetSize(pTableListInfo->pTableList); -// if (size == 0) { -// return NULL; -// } -// -// STableQueryInfo* pTableQueryInfo = taosMemoryCalloc(size, sizeof(STableQueryInfo)); -// if (pTableQueryInfo == NULL) { -// return NULL; -// } -// -// for (int32_t j = 0; j < size; ++j) { -// STableKeyInfo* pk = taosArrayGet(pTableListInfo->pTableList, j); -// STableQueryInfo* pTQueryInfo = &pTableQueryInfo[j]; -// pTQueryInfo->lastKey = pk->lastKey; -// } -// -// pTableQueryInfo->lastKey = 0; -// return pTableQueryInfo; -//} - SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResultBlock, SExprInfo* pScalarExprInfo, int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo) { @@ -4526,7 +4401,6 @@ static int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableU static SArray* extractColumnInfo(SNodeList* pNodeList); static SArray* createSortInfo(SNodeList* pNodeList); -static SArray* extractPartitionColInfo(SNodeList* pNodeList); int32_t extractTableSchemaVersion(SReadHandle* pHandle, uint64_t uid, SExecTaskInfo* pTaskInfo) { SMetaReader mr = {0}; @@ -4861,12 +4735,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) { - SPartitionPhysiNode* pPartNode = (SPartitionPhysiNode*)pPhyNode; - SArray* pColList = extractPartitionColInfo(pPartNode->pPartitionKeys); - SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); - - SExprInfo* pExprInfo = createExprInfo(pPartNode->pTargets, NULL, &num); - pOptr = createPartitionOperatorInfo(ops[0], pExprInfo, num, pResBlock, pColList, pTaskInfo); + pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE == type) { SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode; diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 688c576d03..23226a0134 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -320,8 +320,8 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) { setInputDataBlock(pOperator, pInfo->binfo.pCtx, pBlock, order, scanFlag, true); // there is an scalar expression that needs to be calculated right before apply the group aggregation. - if (pInfo->pScalarExprInfo != NULL) { - pTaskInfo->code = projectApplyFunctions(pInfo->pScalarExprInfo, pBlock, pBlock, pInfo->pScalarFuncCtx, pInfo->numOfScalarExpr, NULL); + if (pInfo->scalarSup.pScalarExprInfo != NULL) { + pTaskInfo->code = projectApplyFunctions(pInfo->scalarSup.pScalarExprInfo, pBlock, pBlock, pInfo->scalarSup.pScalarFuncCtx, pInfo->scalarSup.numOfScalarExpr, NULL); if (pTaskInfo->code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, pTaskInfo->code); } @@ -385,9 +385,9 @@ SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SExprInfo* pEx pInfo->pGroupCols = pGroupColList; pInfo->pCondition = pCondition; - pInfo->pScalarExprInfo = pScalarExprInfo; - pInfo->numOfScalarExpr = numOfScalarExpr; - pInfo->pScalarFuncCtx = createSqlFunctionCtx(pScalarExprInfo, numOfScalarExpr, &pInfo->rowCellInfoOffset); + pInfo->scalarSup.pScalarExprInfo = pScalarExprInfo; + pInfo->scalarSup.numOfScalarExpr = numOfScalarExpr; + pInfo->scalarSup.pScalarFuncCtx = createSqlFunctionCtx(pScalarExprInfo, numOfScalarExpr, &pInfo->scalarSup.rowCellInfoOffset); int32_t code = initGroupOptrInfo(&pInfo->pGroupColVals, &pInfo->groupKeyLen, &pInfo->keyBuf, pGroupColList); if (code != TSDB_CODE_SUCCESS) { @@ -624,7 +624,9 @@ static SSDataBlock* hashPartition(SOperatorInfo* pOperator) { return NULL; } - SGroupbyOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + SPartitionOperatorInfo* pInfo = pOperator->info; SSDataBlock* pRes = pInfo->binfo.pRes; if (pOperator->status == OP_RES_TO_RETURN) { @@ -641,6 +643,14 @@ static SSDataBlock* hashPartition(SOperatorInfo* pOperator) { break; } + // there is an scalar expression that needs to be calculated right before apply the group aggregation. + if (pInfo->scalarSupp.pScalarExprInfo != NULL) { + pTaskInfo->code = projectApplyFunctions(pInfo->scalarSupp.pScalarExprInfo, pBlock, pBlock, pInfo->scalarSupp.pScalarFuncCtx, pInfo->scalarSupp.numOfScalarExpr, NULL); + if (pTaskInfo->code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, pTaskInfo->code); + } + } + doHashPartition(pOperator, pBlock); } @@ -665,15 +675,26 @@ static void destroyPartitionOperatorInfo(void* param, int32_t numOfOutput) { taosMemoryFree(pInfo->columnOffset); } -SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResultBlock, SArray* pGroupColList, SExecTaskInfo* pTaskInfo) { +SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo) { SPartitionOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SPartitionOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { goto _error; } - pInfo->pGroupCols = pGroupColList; + SSDataBlock* pResBlock = createResDataBlock(pPartNode->node.pOutputDataBlockDesc); + + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pPartNode->pTargets, NULL, &numOfCols); + + pInfo->pGroupCols = extractPartitionColInfo(pPartNode->pPartitionKeys); + + if (pPartNode->pExprs != NULL) { + pInfo->scalarSupp.numOfScalarExpr = 0; + pInfo->scalarSupp.pScalarExprInfo = createExprInfo(pPartNode->pExprs, NULL, &pInfo->scalarSupp.numOfScalarExpr); + pInfo->scalarSupp.pScalarFuncCtx = createSqlFunctionCtx( + pInfo->scalarSupp.pScalarExprInfo, pInfo->scalarSupp.numOfScalarExpr, &pInfo->scalarSupp.rowCellInfoOffset); + } _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pGroupSet = taosHashInit(100, hashFn, false, HASH_NO_LOCK); @@ -683,16 +704,16 @@ SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SExprInfo* uint32_t defaultPgsz = 0; uint32_t defaultBufsz = 0; - getBufferPgSize(pResultBlock->info.rowSize, &defaultPgsz, &defaultBufsz); + getBufferPgSize(pResBlock->info.rowSize, &defaultPgsz, &defaultBufsz); int32_t code = createDiskbasedBuf(&pInfo->pBuf, defaultPgsz, defaultBufsz, pTaskInfo->id.str, TD_TMP_DIR_PATH); if (code != TSDB_CODE_SUCCESS) { goto _error; } - pInfo->rowCapacity = blockDataGetCapacityInRow(pResultBlock, getBufPageSize(pInfo->pBuf)); - pInfo->columnOffset = setupColumnOffset(pResultBlock, pInfo->rowCapacity); - code = initGroupOptrInfo(&pInfo->pGroupColVals, &pInfo->groupKeyLen, &pInfo->keyBuf, pGroupColList); + pInfo->rowCapacity = blockDataGetCapacityInRow(pResBlock, getBufPageSize(pInfo->pBuf)); + pInfo->columnOffset = setupColumnOffset(pResBlock, pInfo->rowCapacity); + code = initGroupOptrInfo(&pInfo->pGroupColVals, &pInfo->groupKeyLen, &pInfo->keyBuf, pInfo->pGroupCols); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -701,7 +722,7 @@ SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SExprInfo* pOperator->blocking = true; pOperator->status = OP_NOT_OPENED; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_PARTITION; - pInfo->binfo.pRes = pResultBlock; + pInfo->binfo.pRes = pResBlock; pOperator->numOfExprs = numOfCols; pOperator->pExpr = pExprInfo; pOperator->info = pInfo; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d30e4ef6db..62c2240098 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1157,6 +1157,8 @@ static SSDataBlock* doFilterResult(SSysTableScanInfo* pInfo) { return pInfo->pRes->info.rows == 0 ? NULL : pInfo->pRes; } + doFilter(pInfo->pCondition, pInfo->pRes); +#if 0 SFilterInfo* filter = NULL; int32_t code = filterInitFromNode(pInfo->pCondition, &filter, 0); @@ -1202,6 +1204,7 @@ static SSDataBlock* doFilterResult(SSysTableScanInfo* pInfo) { px->info.rows = numOfRow; pInfo->pRes = px; +#endif return pInfo->pRes->info.rows == 0 ? NULL : pInfo->pRes; } @@ -1380,6 +1383,8 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock); doFilterResult(pInfo); + blockDataDestroy(p); + pInfo->loadInfo.totalRows += pInfo->pRes->info.rows; return (pInfo->pRes->info.rows == 0) ? NULL : pInfo->pRes; } @@ -1469,10 +1474,10 @@ int32_t buildSysDbTableInfo(const SSysTableScanInfo* pInfo, int32_t capacity) { p->info.rows = buildDbTableInfoBlock(p, pSysDbTableMeta, size, TSDB_PERFORMANCE_SCHEMA_DB); relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock); - // blockDataDestroy(p); todo handle memory leak - pInfo->pRes->info.rows = p->info.rows; - return p->info.rows; + blockDataDestroy(p); + + return pInfo->pRes->info.rows; } int32_t buildDbTableInfoBlock(const SSDataBlock* p, const SSysTableMeta* pSysDbTableMeta, size_t size, diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 4141c85366..4673123779 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -333,11 +333,6 @@ void doTimeWindowInterpolation(SIntervalAggOperatorInfo* pInfo, int32_t numOfExp continue; } - // if (functionId != FUNCTION_TWA && functionId != FUNCTION_INTERP) { - // pCtx[k].start.key = INT64_MIN; - // continue; - // } - SFunctParam* pParam = &pCtx[k].param[0]; SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, pParam->pCol->slotId); diff --git a/source/libs/function/inc/texpr.h b/source/libs/function/inc/texpr.h deleted file mode 100644 index 68aedcb5a2..0000000000 --- a/source/libs/function/inc/texpr.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_COMMON_EXPR_H_ -#define _TD_COMMON_EXPR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" - -#include "tmsg.h" -#include "taosdef.h" -#include "tskiplist.h" -#include "function.h" - -struct tExprNode; -struct SSchema; - -#define QUERY_COND_REL_PREFIX_IN "IN|" -#define QUERY_COND_REL_PREFIX_LIKE "LIKE|" -#define QUERY_COND_REL_PREFIX_MATCH "MATCH|" -#define QUERY_COND_REL_PREFIX_NMATCH "NMATCH|" - -#define QUERY_COND_REL_PREFIX_IN_LEN 3 -#define QUERY_COND_REL_PREFIX_LIKE_LEN 5 -#define QUERY_COND_REL_PREFIX_MATCH_LEN 6 -#define QUERY_COND_REL_PREFIX_NMATCH_LEN 7 - -typedef bool (*__result_filter_fn_t)(const void *, void *); -typedef void (*__do_filter_suppl_fn_t)(void *, void *); - -/** - * this structure is used to filter data in tags, so the offset of filtered tag column in tagdata string is required - */ -typedef struct tQueryInfo { - uint8_t optr; // expression operator - SSchema sch; // schema of tags - char* q; - __compar_fn_t compare; // filter function - bool indexed; // indexed columns -} tQueryInfo; - -typedef struct SExprTraverseSupp { - __result_filter_fn_t nodeFilterFn; - __do_filter_suppl_fn_t setupInfoFn; - void *pExtInfo; -} SExprTraverseSupp; - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_COMMON_EXPR_H_*/ diff --git a/source/libs/function/src/texpr.c b/source/libs/function/src/texpr.c index f04b4f17f9..039e0a9dfc 100644 --- a/source/libs/function/src/texpr.c +++ b/source/libs/function/src/texpr.c @@ -17,15 +17,7 @@ #include "os.h" #include "texception.h" -#include "taosdef.h" #include "tmsg.h" -#include "tarray.h" -#include "tbuffer.h" -#include "tcompare.h" -#include "thash.h" -#include "texpr.h" -#include "tvariant.h" -#include "tdef.h" static void doExprTreeDestroy(tExprNode **pExpr, void (*fp)(void *)); From 803249837a00f12207a455fbc228209cf2372360 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 16 Jun 2022 10:29:30 +0800 Subject: [PATCH 04/36] feat(stream): drop stream --- include/common/tmsg.h | 6 ++--- source/dnode/mnode/impl/src/mndStream.c | 31 ++++++++++++++----------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4894bef8ff..7c0e8206e1 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2008,7 +2008,7 @@ typedef struct { char sql[TSDB_SHOW_SQL_LEN]; uint64_t queryId; int64_t useconds; - int64_t stime; // timestamp precision ms + int64_t stime; // timestamp precision ms int64_t reqRid; int32_t pid; bool stableQuery; @@ -2257,8 +2257,8 @@ typedef struct { } SMqVDeleteRsp; typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - int64_t streamId; + char name[TSDB_STREAM_FNAME_LEN]; + int8_t igNotExists; } SMDropStreamTaskReq; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 24e41435e9..491835fc15 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -278,8 +278,8 @@ int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast } static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream, const char *user) { - SStbObj *pStb = NULL; - SDbObj *pDb = NULL; + SStbObj *pStb = NULL; + SDbObj *pDb = NULL; SMCreateStbReq createReq = {0}; tstrncpy(createReq.name, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); @@ -321,7 +321,6 @@ static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStre goto _OVER; } - if (mndCheckDbAuth(pMnode, user, MND_OPER_WRITE_DB, pDb) != 0) { goto _OVER; } @@ -520,13 +519,19 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { /*SDbObj *pDb = NULL;*/ /*SUserObj *pUser = NULL;*/ - SMDropStreamTaskReq dropStreamReq = *(SMDropStreamTaskReq *)pReq->pCont; + SMDropStreamTaskReq dropReq = *(SMDropStreamTaskReq *)pReq->pCont; - pStream = mndAcquireStream(pMnode, dropStreamReq.name); + pStream = mndAcquireStream(pMnode, dropReq.name); if (pStream == NULL) { - terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - return -1; + if (dropReq.igNotExists) { + mDebug("stream:%s, not exist, ignore not exist is set", dropReq.name); + code = 0; + goto DROP_STREAM_OVER; + } else { + terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; + return -1; + } } #if 0 @@ -539,19 +544,19 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pReq); if (pTrans == NULL) { - mError("stream:%s, failed to drop since %s", dropStreamReq.name, terrstr()); - return -1; + mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); + return code; } - mDebug("trans:%d, used to drop stream:%s", pTrans->id, dropStreamReq.name); + mDebug("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { - mError("stream:%s, failed to drop task since %s", dropStreamReq.name, terrstr()); - return -1; + mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); + return code; } DROP_STREAM_OVER: - return 0; + return code; } static int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) { From 63884b554bf9104ffc6bb7e13d0efd5483b3b1be Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 10:51:29 +0800 Subject: [PATCH 05/36] fix(query): set correct sort buffer page size. --- source/common/src/tdatablock.c | 1 + source/libs/executor/inc/tsort.h | 7 +++++++ source/libs/executor/src/sortoperator.c | 27 +++++++++++++++---------- source/libs/executor/src/tsort.c | 26 +++++++++++++++--------- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 5a2aaed74e..6a4ddacf60 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1226,6 +1226,7 @@ SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData) { SColumnInfoData colInfo = {0}; SColumnInfoData* p = taosArrayGet(pDataBlock->pDataBlock, i); colInfo.info = p->info; + colInfo.hasNull = true; taosArrayPush(pBlock->pDataBlock, &colInfo); } diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index fd3581e2bf..e731c55a7d 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -151,6 +151,13 @@ SSDataBlock* tsortGetSortedDataBlock(const SSortHandle* pSortHandle); */ SSortExecInfo tsortGetSortExecInfo(SSortHandle* pHandle); +/** + * get proper sort buffer pages according to the row size + * @param rowSize + * @return + */ +int32_t getProperSortPageSize(size_t rowSize); + #ifdef __cplusplus } #endif diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 1008a5263c..98a1aacbb6 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -420,24 +420,29 @@ SOperatorInfo* createMultiwaySortMergeOperatorInfo(SOperatorInfo** downStreams, goto _error; } - pInfo->binfo.pRes = pResBlock; initResultSizeInfo(pOperator, 1024); - pInfo->pSortInfo = pSortInfo; + pInfo->binfo.pRes = pResBlock; + pInfo->pSortInfo = pSortInfo; pInfo->pColMatchInfo = pColMatchColInfo; - pInfo->pInputBlock = pInputBlock; - pOperator->name = "MultiwaySortMerge"; + pInfo->pInputBlock = pInputBlock; + pOperator->name = "MultiwaySortMerge"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_MERGE; - pOperator->blocking = false; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - - pInfo->bufPageSize = rowSize < 1024 ? 1024 : rowSize * 2; - pInfo->sortBufSize = pInfo->bufPageSize * 16; - pInfo->hasGroupId = false; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pInfo->hasGroupId = false; pInfo->prefetchedTuple = NULL; pOperator->pTaskInfo = pTaskInfo; + + pInfo->bufPageSize = getProperSortPageSize(rowSize); + + uint32_t numOfSources = taosArrayGetSize(pSortInfo); + numOfSources = MAX(2, numOfSources); + + pInfo->sortBufSize = numOfSources * pInfo->bufPageSize; + pOperator->fpSet = createOperatorFpSet(doOpenMultiwaySortMergeOperator, doMultiwaySortMerge, NULL, NULL, destroyMultiwaySortMergeOperatorInfo, NULL, NULL, getMultiwaySortMergeExplainExecInfo); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 1473bd81bb..daaba74d7c 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -532,6 +532,19 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { return 0; } +int32_t getProperSortPageSize(size_t rowSize) { + uint32_t defaultPageSize = 4096; + + uint32_t pgSize = 0; + if (rowSize * 4 > defaultPageSize) { + pgSize = rowSize * 4; + } else { + pgSize = defaultPageSize; + } + + return pgSize; +} + static int32_t createInitialSources(SSortHandle* pHandle) { size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; @@ -557,14 +570,9 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (!hasGroupId) { // calculate the buffer pages according to the total available buffers. - int32_t rowSize = blockDataGetRowSize(pBlock); - if (rowSize * 4 > 4096) { - pHandle->pageSize = rowSize * 4; - } else { - pHandle->pageSize = 4096; - } - - // todo!! + pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock)); + + // todo, number of pages are set according to the total available sort buffer pHandle->numOfPages = 1024; sortBufSize = pHandle->numOfPages * pHandle->pageSize; @@ -577,7 +585,7 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (pHandle->beforeFp != NULL) { pHandle->beforeFp(pBlock, pHandle->param); } - // todo relocate the columns + int32_t code = blockDataMerge(pHandle->pDataBlock, pBlock); if (code != 0) { return code; From 58051169d2102ad1b9c43f62673d40e34d7aab63 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 16 Jun 2022 10:54:13 +0800 Subject: [PATCH 06/36] refactor(sync): add trace log --- source/libs/sync/src/syncAppendEntriesReply.c | 8 +-- source/libs/sync/src/syncSnapshot.c | 70 ++++++++++--------- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 0e116e13ee..fbe3319675 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -191,17 +191,17 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries char* s = snapshotSender2Str(pSender); sDebug( "vgId:%d sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld" + "lastConfigIndex:%ld privateTerm:%lu " "sender:%s", ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - pSender->snapshot.lastConfigIndex, s); + pSender->snapshot.lastConfigIndex, pSender->privateTerm, s); taosMemoryFree(s); } else { sDebug( "vgId:%d sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " - "lastApplyTerm:%lu lastConfigIndex:%ld", + "lastApplyTerm:%lu lastConfigIndex:%ld privateTerm:%lu", ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - pSender->snapshot.lastConfigIndex); + pSender->snapshot.lastConfigIndex, pSender->privateTerm); } } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index afebfa798e..c92920a936 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -142,17 +142,17 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld send " + "lastConfigIndex:%ld privateTerm:%lu send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld", + "lastConfigIndex:%ld privateTerm:%lu", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -280,24 +280,24 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld send " + "lastConfigIndex:%ld privateTerm:%lu send " "msg:%s", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, msgStr); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld", + "lastConfigIndex:%ld privateTerm:%lu", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); } } else { sDebug( "vgId:%d sync event snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " - "lastConfigIndex:%ld", + "lastConfigIndex:%ld privateTerm:%lu", pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex); + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -328,12 +328,12 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); - sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", pSender->pSyncNode->vgId, - host, port, pSender->seq, pSender->ack, msgStr); + sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu send msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { - sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d", pSender->pSyncNode->vgId, host, port, - pSender->seq, pSender->ack); + sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu", pSender->pSyncNode->vgId, + host, port, pSender->seq, pSender->ack, pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -567,15 +567,16 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld, recv msg:%s", + "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - msgStr); + pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + "lastConfigIndex:%ld privateTerm:%lu", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, + pReceiver->privateTerm); } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { @@ -638,17 +639,17 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { sDebug( "vgId:%d sync event snapshot recv from %s:%d finish, update log begin index:%ld, " "snapshot.lastApplyIndex:%ld, " - "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld, raft log:%s", + "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld, privateTerm:%lu, raft log:%s", pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - snapshot.lastConfigIndex, logSimpleStr); + snapshot.lastConfigIndex, pReceiver->privateTerm, logSimpleStr); taosMemoryFree(logSimpleStr); } else { sDebug( "vgId:%d sync event snapshot recv from %s:%d finish, update log begin index:%ld, " "snapshot.lastApplyIndex:%ld, " - "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld", + "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld, privateTerm:%lu", pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - snapshot.lastConfigIndex); + snapshot.lastConfigIndex, pReceiver->privateTerm); } pReceiver->pWriter = NULL; @@ -660,16 +661,16 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld, recv msg:%s", + "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, msgStr); + pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld", + "lastConfigIndex:%ld, privateTerm:%lu", pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex); + pMsg->lastConfigIndex, pReceiver->privateTerm); } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { @@ -685,17 +686,17 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld, recv " + "lastConfigIndex:%ld, privateTerm:%lu, recv " "msg:%s", pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, msgStr); + pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld", + "lastConfigIndex:%ld, privateTerm:%lu", pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex); + pMsg->lastConfigIndex, pReceiver->privateTerm); } } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { @@ -716,15 +717,16 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( "vgId:%d sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld, recv msg:%s", + "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - msgStr); + pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( "vgId:%d sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, " - "lastConfigIndex:%ld", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + "lastConfigIndex:%ld, privateTerm:%lu", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, + pReceiver->privateTerm); } } else { From 2360f072a84dba8086dda533e8297d2821b5b8b9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 11:43:11 +0800 Subject: [PATCH 07/36] fix(query): add indef process procedure. --- include/libs/executor/executor.h | 9 +-------- source/libs/executor/inc/executorimpl.h | 4 ++-- source/libs/executor/src/executor.c | 2 +- source/libs/executor/src/executorMain.c | 4 ++-- source/libs/executor/src/executorimpl.c | 17 ++++++++++++++++- source/libs/qworker/inc/qwMsg.h | 2 +- source/libs/qworker/src/qwMsg.c | 4 +--- source/libs/qworker/src/qworker.c | 6 +++--- 8 files changed, 27 insertions(+), 21 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 3f05c27453..fdedb947d7 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -95,7 +95,7 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo * @return */ int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, struct SSubplan* pPlan, - qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, EOPTR_EXEC_MODEL model); + qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, const char* sql, EOPTR_EXEC_MODEL model); /** * @@ -159,13 +159,6 @@ int64_t qGetQueriedTableUid(qTaskInfo_t tinfo); */ int32_t qGetQualifiedTableIdList(void* pTableList, const char* tagCond, int32_t tagCondLen, SArray* pTableIdList); -/** - * Update the table id list of a given query. - * @param uid child table uid - * @param type operation type: ADD|DROP - * @return - */ -int32_t qUpdateQueriedTableIdList(qTaskInfo_t tinfo, int64_t uid, int32_t type); void qProcessFetchRsp(void* parent, struct SRpcMsg* pMsg, struct SEpSet* pEpSet); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 758c031028..b435446513 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -189,7 +189,7 @@ typedef struct SExecTaskInfo { } schemaVer; STableListInfo tableqinfoList; // this is a table list - char* sql; // query sql string + const char* sql; // query sql string jmp_buf env; // jump to this position when error happens. EOPTR_EXEC_MODEL execModel; // operator execution model [batch model|stream model] struct SOperatorInfo* pRoot; @@ -894,7 +894,7 @@ int32_t decodeOperator(SOperatorInfo* ops, char* data, int32_t length); void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, - EOPTR_EXEC_MODEL model); + const char* sql, EOPTR_EXEC_MODEL model); int32_t createDataSinkParam(SDataSinkNode *pNode, void **pParam, qTaskInfo_t* pTaskInfo); int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SExplainExecInfo** pRes, int32_t* capacity, int32_t* resNum); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index bebfc75f17..b1d076e8f5 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -121,7 +121,7 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, void* streamReadHandle) { } qTaskInfo_t pTaskInfo = NULL; - code = qCreateExecTask(streamReadHandle, 0, 0, plan, &pTaskInfo, NULL, OPTR_EXEC_MODEL_STREAM); + code = qCreateExecTask(streamReadHandle, 0, 0, plan, &pTaskInfo, NULL, NULL, OPTR_EXEC_MODEL_STREAM); if (code != TSDB_CODE_SUCCESS) { // TODO: destroy SSubplan & pTaskInfo terrno = code; diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c index b1125c8e80..00158d7024 100644 --- a/source/libs/executor/src/executorMain.c +++ b/source/libs/executor/src/executorMain.c @@ -31,13 +31,13 @@ static void initRefPool() { } int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, SSubplan* pSubplan, - qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, EOPTR_EXEC_MODEL model) { + qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, const char* sql, EOPTR_EXEC_MODEL model) { assert(readHandle != NULL && pSubplan != NULL); SExecTaskInfo** pTask = (SExecTaskInfo**)pTaskInfo; taosThreadOnce(&initPoolOnce, initRefPool); - int32_t code = createExecTaskInfoImpl(pSubplan, pTask, readHandle, taskId, model); + int32_t code = createExecTaskInfoImpl(pSubplan, pTask, readHandle, taskId, sql, model); if (code != TSDB_CODE_SUCCESS) { goto _error; } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index d066284c56..9ce22431e1 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -736,6 +736,20 @@ int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBloc // _rowts/_c0, not tbname column if (fmIsPseudoColumnFunc(pfCtx->functionId) && (!fmIsScanPseudoColumnFunc(pfCtx->functionId))) { // do nothing + } else if (fmIsIndefiniteRowsFunc(pfCtx->functionId)) { + SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[k]); + pfCtx->fpSet.init(&pCtx[k], pResInfo); + + pfCtx->pOutput = taosArrayGet(pResult->pDataBlock, outputSlotId); + pfCtx->offset = createNewColModel ? 0 : pResult->info.rows; // set the start offset + + // set the timestamp(_rowts) output buffer + if (taosArrayGetSize(pPseudoList) > 0) { + int32_t* outputColIndex = taosArrayGet(pPseudoList, 0); + pfCtx->pTsOutput = (SColumnInfoData*)pCtx[*outputColIndex].pOutput; + } + + numOfRows = pfCtx->fpSet.process(pfCtx); } else { SArray* pBlockList = taosArrayInit(4, POINTER_BYTES); taosArrayPush(pBlockList, &pSrcBlock); @@ -5183,7 +5197,7 @@ int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, qTaskInfo_t* pT } int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, - EOPTR_EXEC_MODEL model) { + const char* sql, EOPTR_EXEC_MODEL model) { uint64_t queryId = pPlan->id.queryId; int32_t code = TSDB_CODE_SUCCESS; @@ -5193,6 +5207,7 @@ int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SRead goto _complete; } + (*pTaskInfo)->sql = sql; (*pTaskInfo)->pRoot = createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, queryId, taskId, &(*pTaskInfo)->tableqinfoList, pPlan->pTagCond); if (NULL == (*pTaskInfo)->pRoot) { diff --git a/source/libs/qworker/inc/qwMsg.h b/source/libs/qworker/inc/qwMsg.h index 29861d87ac..ecff861f50 100644 --- a/source/libs/qworker/inc/qwMsg.h +++ b/source/libs/qworker/inc/qwMsg.h @@ -24,7 +24,7 @@ extern "C" { #include "dataSinkMgt.h" int32_t qwPrerocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg); -int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t explain); +int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t explain, const char* sql); int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg); int32_t qwProcessReady(QW_FPARAMS_DEF, SQWMsg *qwMsg); int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg); diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 848a0420ca..5635ec8fc6 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -308,10 +308,8 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info}; char * sql = strndup(msg->msg, msg->sqlLen); QW_SCH_TASK_DLOG("processQuery start, node:%p, handle:%p, sql:%s", node, pMsg->info.handle, sql); - taosMemoryFreeClear(sql); - - QW_ERR_RET(qwProcessQuery(QW_FPARAMS(), &qwMsg, msg->taskType, msg->explain)); + QW_ERR_RET(qwProcessQuery(QW_FPARAMS(), &qwMsg, msg->taskType, msg->explain, sql)); QW_SCH_TASK_DLOG("processQuery end, node:%p", node); return TSDB_CODE_SUCCESS; diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 451607e7d0..57ebb89ed2 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -510,7 +510,7 @@ _return: } -int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t explain) { +int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t explain, const char* sql) { int32_t code = 0; bool queryRsped = false; SSubplan *plan = NULL; @@ -537,7 +537,7 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex ctx->plan = plan; - code = qCreateExecTask(qwMsg->node, mgmt->nodeId, tId, plan, &pTaskInfo, &sinkHandle, OPTR_EXEC_MODEL_BATCH); + code = qCreateExecTask(qwMsg->node, mgmt->nodeId, tId, plan, &pTaskInfo, &sinkHandle, sql, OPTR_EXEC_MODEL_BATCH); if (code) { QW_TASK_ELOG("qCreateExecTask failed, code:%x - %s", code, tstrerror(code)); QW_ERR_JRET(code); @@ -938,7 +938,7 @@ int32_t qwProcessDelete(QW_FPARAMS_DEF, SQWMsg *qwMsg, SRpcMsg *pRsp, SDeleteRes ctx.plan = plan; - code = qCreateExecTask(qwMsg->node, mgmt->nodeId, tId, plan, &pTaskInfo, &sinkHandle, OPTR_EXEC_MODEL_BATCH); + code = qCreateExecTask(qwMsg->node, mgmt->nodeId, tId, plan, &pTaskInfo, &sinkHandle, NULL, OPTR_EXEC_MODEL_BATCH); if (code) { QW_TASK_ELOG("qCreateExecTask failed, code:%x - %s", code, tstrerror(code)); QW_ERR_JRET(code); From 102fdf2916aadfe0f438806fd7c188d3580a4537 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 11:52:58 +0800 Subject: [PATCH 08/36] fix(query): fix the syntax error in unit test. --- source/libs/executor/test/executorTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/test/executorTests.cpp b/source/libs/executor/test/executorTests.cpp index 880ac6c78e..dba2fb897a 100644 --- a/source/libs/executor/test/executorTests.cpp +++ b/source/libs/executor/test/executorTests.cpp @@ -945,7 +945,7 @@ TEST(testCase, build_executor_tree_Test) { int32_t code = qStringToSubplan(msg, &plan); ASSERT_EQ(code, 0); - code = qCreateExecTask(&handle, 2, 1, plan, (void**)&pTaskInfo, &sinkHandle, OPTR_EXEC_MODEL_BATCH); + code = qCreateExecTask(&handle, 2, 1, plan, (void**)&pTaskInfo, &sinkHandle, NULL, OPTR_EXEC_MODEL_BATCH); ASSERT_EQ(code, 0); } #if 0 From ed532ed7acb5e0e4916b09e2e1e9e60308790380 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 16 Jun 2022 12:01:13 +0800 Subject: [PATCH 09/36] refactor: adjust logs --- source/dnode/mnode/impl/src/mndDnode.c | 1 + source/dnode/mnode/impl/src/mndMain.c | 1 + source/dnode/mnode/impl/src/mndMnode.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 53cbb9b669..6d37143fc9 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -370,6 +370,7 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { SMnodeObj *pObj = mndAcquireMnode(pMnode, pDnode->id); if (pObj != NULL) { if (pObj->state != statusReq.mload.syncState) { + mInfo("dnode:%d, mnode syncstate from %s to %s", pObj->id, syncStr(pObj->state), syncStr(statusReq.mload.syncState)); pObj->state = statusReq.mload.syncState; pObj->stateStartTime = taosGetTimestampMs(); } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 00118eac1e..e141e7dfb3 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -699,6 +699,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) { pLoad->syncState = syncGetMyRole(pMnode->syncMgmt.sync); + mTrace("mnode current syncstate is %s", syncStr(pLoad->syncState)); return 0; } diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 7ec490d52f..e82c479cdd 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -667,6 +667,10 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB } if (pObj->pDnode && mndIsDnodeOnline(pObj->pDnode, curMs)) { roles = syncStr(pObj->state); + if (pObj->state == TAOS_SYNC_STATE_LEADER && pObj->id != pMnode->selfDnodeId) { + roles = syncStr(TAOS_SYNC_STATE_ERROR); + mError("mnode:%d, is leader too", pObj->id); + } } char b2[12 + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(b2, roles, pShow->pMeta->pSchemas[cols].bytes); From 75e0eaa53fed96fa0a766475dcb57ebdf1c7032c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 13:53:02 +0800 Subject: [PATCH 10/36] fix(query): add null pointer check. --- source/libs/executor/src/tsort.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index daaba74d7c..5bcd58f8db 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -204,7 +204,12 @@ static int32_t sortComparInit(SMsortComparParam* cmpParam, SArray* pSources, int if (pHandle->type == SORT_SINGLESOURCE_SORT) { for (int32_t i = 0; i < cmpParam->numOfSources; ++i) { SSortSource* pSource = cmpParam->pSources[i]; - SPageInfo* pPgInfo = *(SPageInfo**)taosArrayGet(pSource->pageIdList, pSource->pageIndex); + + if (taosArrayGetSize(pSource->pageIdList) == 0) { + return TSDB_CODE_SUCCESS; + } + + SPageInfo* pPgInfo = *(SPageInfo**)taosArrayGet(pSource->pageIdList, pSource->pageIndex); void* pPage = getBufPage(pHandle->pBuf, getPageId(pPgInfo)); code = blockDataFromBuf(pSource->src.pBlock, pPage); From 2ba3b7538c50b0d7267d0c056b962e0d7f90aebe Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 16 Jun 2022 14:09:14 +0800 Subject: [PATCH 11/36] test: redistribute vgroup --- ...redistribute_vgroup_replica3_v1_leader.sim | 166 +++++------------- 1 file changed, 48 insertions(+), 118 deletions(-) diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim index 90f7b1ad1f..7b52b51306 100644 --- a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim @@ -4,6 +4,11 @@ system sh/deploy.sh -n dnode2 -i 2 system sh/deploy.sh -n dnode3 -i 3 system sh/deploy.sh -n dnode4 -i 4 system sh/deploy.sh -n dnode5 -i 5 +system sh/cfg.sh -n dnode1 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode2 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode3 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode4 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode5 -c transPullupInterval -v 1 system sh/cfg.sh -n dnode1 -c supportVnodes -v 0 system sh/exec.sh -n dnode1 -s start system sh/exec.sh -n dnode2 -s start @@ -128,9 +133,9 @@ if $rows != 1 then endi if $data(2)[4] == leader then $leaderExist = 1 - $leaderVnode = 4 - $follower1 = 2 - $follower2 = 3 + $leaderVnode = 2 + $follower1 = 3 + $follower2 = 4 endi if $data(2)[6] == leader then $leaderExist = 1 @@ -140,9 +145,9 @@ if $data(2)[6] == leader then endi if $data(2)[8] == leader then $leaderExist = 1 - $leaderVnode = 2 - $follower1 = 3 - $follower2 = 4 + $leaderVnode = 4 + $follower1 = 2 + $follower2 = 3 endi if $leaderExist != 1 then goto step3 @@ -160,110 +165,22 @@ if $rows != 1 then return -1 endi -<<<<<<< HEAD -print =============== step32: move follower2 -print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 -sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 -<<<<<<< HEAD:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim -======= -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 - ->>>>>>> origin/3.0:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim -======= print =============== step32: move leader print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 sql show d1.vgroups print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 ->>>>>>> origin/3.0 sql show d1.tables if $rows != 1 then return -1 endi -<<<<<<< HEAD +return + print =============== step33: move follower1 -print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 -sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 -<<<<<<< HEAD:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim -======= -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 - ->>>>>>> origin/3.0:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim -======= -print =============== step33: move follower2 -print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 -sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 - ->>>>>>> origin/3.0 -sql show d1.tables -if $rows != 1 then - return -1 -endi - -<<<<<<< HEAD -print =============== step34: move follower2 -print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 -sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 -<<<<<<< HEAD:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim -======= -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 - ->>>>>>> origin/3.0:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim -======= -print =============== step34: move follower1 -print redistribute vgroup 2 dnode $follower2 dnode 5 dnode $leaderVnode -sql redistribute vgroup 2 dnode $follower2 dnode 5 dnode $leaderVnode -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 - ->>>>>>> origin/3.0 -sql show d1.tables -if $rows != 1 then - return -1 -endi - -<<<<<<< HEAD -print =============== step35: move follower1 -print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 -sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 -<<<<<<< HEAD:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim -sql show d1.tables -if $rows != 1 then - return -1 -endi -======= -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 ->>>>>>> origin/3.0:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim -======= -print =============== step35: move 5 -print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode $follower2 -sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode $follower2 -sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 ->>>>>>> origin/3.0 - -sql show d1.tables -if $rows != 1 then - return -1 -endi - -<<<<<<< HEAD -print =============== step36: move follower2 print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 -======= -print =============== step36: move follower1 -print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 -sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 ->>>>>>> origin/3.0 sql show d1.vgroups print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 @@ -272,11 +189,40 @@ if $rows != 1 then return -1 endi -<<<<<<< HEAD -print =============== step37: move follower1 -======= +print =============== step34: move follower2 +print redistribute vgroup 2 dnode $follower1 dnode 5 dnode $leaderVnode +sql redistribute vgroup 2 dnode $follower1 dnode 5 dnode $leaderVnode +sql show d1.vgroups +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 + +sql show d1.tables +if $rows != 1 then + return -1 +endi + +print =============== step35: move leader +print redistribute vgroup 2 dnode $follower2 dnode 5 dnode $follower1 +sql redistribute vgroup 2 dnode $follower2 dnode 5 dnode $follower1 +sql show d1.vgroups +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 + +sql show d1.tables +if $rows != 1 then + return -1 +endi + +print =============== step36: move follower1 +print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +sql show d1.vgroups +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 + +sql show d1.tables +if $rows != 1 then + return -1 +endi + print =============== step37: move follower2 ->>>>>>> origin/3.0 print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 sql show d1.vgroups @@ -287,17 +233,9 @@ if $rows != 1 then return -1 endi -<<<<<<< HEAD -<<<<<<< HEAD:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim -======= -print =============== step38: move follower2 -print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 -sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 -======= print =============== step38: move leader print redistribute vgroup 2 dnode $follower1 dnode 5 dnode $follower2 sql redistribute vgroup 2 dnode $follower1 dnode 5 dnode $follower2 ->>>>>>> origin/3.0 sql show d1.vgroups print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 @@ -305,18 +243,10 @@ sql show d1.tables if $rows != 1 then return -1 endi -<<<<<<< HEAD ->>>>>>> origin/3.0:tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim print =============== step39: move follower1 -print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 -sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 -======= - -print =============== step39: move 5 -print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode $follower1 -sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode $follower1 ->>>>>>> origin/3.0 +print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 +sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 sql show d1.vgroups print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 From 77b03ac821c8c03d8f583d2e52203dd7021e605a Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 16 Jun 2022 14:40:42 +0800 Subject: [PATCH 12/36] refactor(sync): add restore finish when become leader again --- include/libs/sync/sync.h | 2 +- source/dnode/mnode/impl/src/mndSync.c | 12 +-- source/libs/sync/inc/syncInt.h | 3 + source/libs/sync/src/syncMain.c | 129 +++++++++++++++++++------ source/libs/sync/src/syncRaftLog.c | 7 +- source/libs/sync/src/syncRespMgr.c | 15 +-- source/libs/sync/src/syncSnapshot.c | 7 ++ source/libs/sync/test/syncTestTool.cpp | 8 +- 8 files changed, 134 insertions(+), 49 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 2d21b3531a..e694203563 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -24,7 +24,7 @@ extern "C" { #include "tdef.h" #include "tmsgcb.h" -#define SYNC_INDEX_BEGIN 0 +#define SYNC_INDEX_BEGIN 0 #define SYNC_INDEX_INVALID -1 typedef uint64_t SyncNodeId; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 63708aef8a..3a243e36d3 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -221,17 +221,17 @@ void mndCleanupSync(SMnode *pMnode) { int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; - SRpcMsg rsp = {.code = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)}; - rsp.pCont = rpcMallocCont(rsp.contLen); - if (rsp.pCont == NULL) return -1; - memcpy(rsp.pCont, pRaw, rsp.contLen); + SRpcMsg req = {.msgType = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)}; + req.pCont = rpcMallocCont(req.contLen); + if (req.pCont == NULL) return -1; + memcpy(req.pCont, pRaw, req.contLen); pMgmt->errCode = 0; pMgmt->transId = transId; mTrace("trans:%d, will be proposed", pMgmt->transId); const bool isWeak = false; - int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak); + int32_t code = syncPropose(pMgmt->sync, &req, isWeak); if (code == 0) { tsem_wait(&pMgmt->syncSem); } else if (code == -1 && terrno == TSDB_CODE_SYN_NOT_LEADER) { @@ -242,7 +242,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { terrno = TSDB_CODE_APP_ERROR; } - rpcFreeCont(rsp.pCont); + rpcFreeCont(req.pCont); if (code != 0) { mError("trans:%d, failed to propose, code:0x%x", pMgmt->transId, code); return code; diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 8a44dcd9bc..b00c7cbda1 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -195,6 +195,7 @@ int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, S cJSON* syncNode2Json(const SSyncNode* pSyncNode); char* syncNode2Str(const SSyncNode* pSyncNode); char* syncNode2SimpleStr(const SSyncNode* pSyncNode); +bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config); void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop); SSyncNode* syncNodeAcquire(int64_t rid); @@ -230,6 +231,8 @@ int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncInd int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag); +int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg); + bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId); SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index a0d8487b63..5e3c1adf32 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -185,7 +185,9 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg ASSERT(rid == pSyncNode->rid); int32_t ret = 0; - bool IamInNew = false; + bool IamInNew = syncNodeInConfig(pSyncNode, pNewCfg); + +#if 0 for (int i = 0; i < pNewCfg->replicaNum; ++i) { if (strcmp((pNewCfg->nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (pNewCfg->nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { @@ -201,6 +203,7 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg } */ } +#endif if (!IamInNew) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); @@ -228,7 +231,9 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg) { } ASSERT(rid == pSyncNode->rid); - bool IamInNew = false; + bool IamInNew = syncNodeInConfig(pSyncNode, pNewCfg); + +#if 0 for (int i = 0; i < pNewCfg->replicaNum; ++i) { if (strcmp((pNewCfg->nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (pNewCfg->nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { @@ -247,6 +252,7 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg) { } */ } +#endif if (!IamInNew) { sError("sync reconfig error, not in new config"); @@ -556,7 +562,7 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { return -1; } assert(rid == pSyncNode->rid); - sDebug("vgId:%d sync event propose msgType:%s", pSyncNode->vgId, TMSG_INFO(pMsg->msgType)); + sDebug("vgId:%d sync event propose msgType:%s,%d", pSyncNode->vgId, TMSG_INFO(pMsg->msgType), pMsg->msgType); ret = syncNodePropose(pSyncNode, pMsg, isWeak); taosReleaseRef(tsNodeRefId, pSyncNode->rid); @@ -565,7 +571,7 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { int32_t syncNodePropose(SSyncNode* pSyncNode, const SRpcMsg* pMsg, bool isWeak) { int32_t ret = 0; - sDebug("vgId:%d sync event propose msgType:%s", pSyncNode->vgId, TMSG_INFO(pMsg->msgType)); + sDebug("vgId:%d sync event propose msgType:%s,%d", pSyncNode->vgId, TMSG_INFO(pMsg->msgType), pMsg->msgType); if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { SRespStub stub; @@ -1256,9 +1262,36 @@ char* syncNode2SimpleStr(const SSyncNode* pSyncNode) { return s; } -void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { +bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config) { + bool b1 = false; + bool b2 = false; + + for (int i = 0; i < config->replicaNum; ++i) { + if (strcmp((config->nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && + (config->nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { + b1 = true; + break; + } + } + + for (int i = 0; i < config->replicaNum; ++i) { + SRaftId raftId; + raftId.addr = syncUtilAddr2U64((config->nodeInfo)[i].nodeFqdn, (config->nodeInfo)[i].nodePort); + raftId.vgId = pSyncNode->vgId; + + if (syncUtilSameId(&raftId, &(pSyncNode->myRaftId))) { + b2 = true; + break; + } + } + + ASSERT(b1 == b2); + return b1; +} + +void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg; - pSyncNode->pRaftCfg->cfg = *newConfig; + pSyncNode->pRaftCfg->cfg = *pNewConfig; pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex; int32_t ret = 0; @@ -1270,7 +1303,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { oldSenders[i] = (pSyncNode->senders)[i]; - sDebug("vgId:%d sync event save senders %d, %p", pSyncNode->vgId, i, oldSenders[i]); + sDebug("vgId:%d sync event save senders %d, %p, privateTerm:%lu", pSyncNode->vgId, i, oldSenders[i], + oldSenders[i]->privateTerm); if (gRaftDetailLog) { ; } @@ -1322,8 +1356,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l char host[128]; uint16_t port; syncUtilU642Addr((pSyncNode->replicasId)[i].addr, host, sizeof(host), &port); - sDebug("vgId:%d sync event reset sender for %lu, newIndex:%d, %s:%d, %p", pSyncNode->vgId, - (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j]); + sDebug("vgId:%d sync event reset sender for %lu, newIndex:%d, %s:%d, %p, privateTerm:%lu", pSyncNode->vgId, + (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j], oldSenders[j]->privateTerm); (pSyncNode->senders)[i] = oldSenders[j]; oldSenders[j] = NULL; reset = true; @@ -1341,7 +1375,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { if ((pSyncNode->senders)[i] == NULL) { (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); - sDebug("vgId:%d sync event create new sender %p replicaIndex:%d", pSyncNode->vgId, (pSyncNode->senders)[i], i); + sDebug("vgId:%d sync event create new sender %p replicaIndex:%d, privateTerm:%lu", pSyncNode->vgId, + (pSyncNode->senders)[i], i, (pSyncNode->senders)[i]->privateTerm); } } @@ -1354,8 +1389,10 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l } } - bool IamInOld = false; - bool IamInNew = false; + bool IamInOld = syncNodeInConfig(pSyncNode, &oldConfig); + bool IamInNew = syncNodeInConfig(pSyncNode, pNewConfig); + +#if 0 for (int i = 0; i < oldConfig.replicaNum; ++i) { if (strcmp((oldConfig.nodeInfo)[i].nodeFqdn, pSyncNode->myNodeInfo.nodeFqdn) == 0 && (oldConfig.nodeInfo)[i].nodePort == pSyncNode->myNodeInfo.nodePort) { @@ -1371,6 +1408,7 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l break; } } +#endif *isDrop = true; if (IamInOld && !IamInNew) { @@ -1379,6 +1417,10 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex l *isDrop = false; } + // may be add me to a new raft group + if (IamInOld && IamInNew && oldConfig.replicaNum == 1) { + } + if (IamInNew) { pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal } @@ -1404,14 +1446,17 @@ void syncNodeRelease(SSyncNode* pNode) { taosReleaseRef(tsNodeRefId, pNode->rid) void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { if (term > pSyncNode->pRaftStore->currentTerm) { raftStoreSetTerm(pSyncNode->pRaftStore, term); - syncNodeBecomeFollower(pSyncNode, "update term"); + char tmpBuf[64]; + snprintf(tmpBuf, sizeof(tmpBuf), "update term to %lu", term); + syncNodeBecomeFollower(pSyncNode, tmpBuf); raftStoreClearVote(pSyncNode->pRaftStore); } } void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { - sDebug("vgId:%d sync event become follower, isStandBy:%d, replicaNum:%d, %s", pSyncNode->vgId, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, debugStr); + sDebug("vgId:%d sync event become follower, isStandBy:%d, currentTerm:%lu, replicaNum:%d, restoreFinish:%d, %s", + pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftStore->currentTerm, pSyncNode->replicaNum, + pSyncNode->restoreFinish, debugStr); // maybe clear leader cache if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { @@ -1445,8 +1490,12 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { // /\ UNCHANGED <> // void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { - sDebug("vgId:%d sync event become leader, isStandBy:%d, replicaNum:%d %s", pSyncNode->vgId, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, debugStr); + // reset restoreFinish + pSyncNode->restoreFinish = false; + + sDebug("vgId:%d sync event become leader, isStandBy:%d, currentTerm:%lu, replicaNum:%d, restoreFinish:%d, %s", + pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftStore->currentTerm, pSyncNode->replicaNum, + pSyncNode->restoreFinish, debugStr); // state change pSyncNode->state = TAOS_SYNC_STATE_LEADER; @@ -2028,11 +2077,11 @@ static int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE pSyncLeaderTransfer->newLeaderId.addr); */ - sDebug("vgId:%d sync event, begin leader transfer", ths->vgId); + sDebug("vgId:%d sync event begin leader transfer", ths->vgId); if (strcmp(pSyncLeaderTransfer->newNodeInfo.nodeFqdn, ths->myNodeInfo.nodeFqdn) == 0 && pSyncLeaderTransfer->newNodeInfo.nodePort == ths->myNodeInfo.nodePort) { - sDebug("vgId:%d sync event, maybe leader transfer to %s:%d %lu", ths->vgId, + sDebug("vgId:%d sync event maybe leader transfer to %s:%d %lu", ths->vgId, pSyncLeaderTransfer->newNodeInfo.nodeFqdn, pSyncLeaderTransfer->newNodeInfo.nodePort, pSyncLeaderTransfer->newLeaderId.addr); @@ -2067,6 +2116,21 @@ static int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE return 0; } +int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg) { + for (int i = 0; i < pNewCfg->replicaNum; ++i) { + SRaftId raftId; + raftId.addr = syncUtilAddr2U64((pNewCfg->nodeInfo)[i].nodeFqdn, (pNewCfg->nodeInfo)[i].nodePort); + raftId.vgId = ths->vgId; + + if (syncUtilSameId(&(ths->myRaftId), &raftId)) { + pNewCfg->myIndex = i; + return 0; + } + } + + return -1; +} + static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; @@ -2075,19 +2139,23 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE ASSERT(ret == 0); // update new config myIndex - bool IamInNew = false; - for (int i = 0; i < newSyncCfg.replicaNum; ++i) { - if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && - ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { - newSyncCfg.myIndex = i; - IamInNew = true; - break; - } - } + syncNodeUpdateNewConfigIndex(ths, &newSyncCfg); + + bool IamInNew = syncNodeInConfig(ths, &newSyncCfg); + + /* + for (int i = 0; i < newSyncCfg.replicaNum; ++i) { + if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && + ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { + newSyncCfg.myIndex = i; + IamInNew = true; + break; + } + } + */ bool isDrop; - // if (IamInNew || (!IamInNew && ths->state != TAOS_SYNC_STATE_LEADER)) { if (IamInNew) { syncNodeUpdateConfig(ths, &newSyncCfg, pEntry->index, &isDrop); @@ -2186,7 +2254,8 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, ths->pFsm->FpRestoreFinishCb(ths->pFsm); } ths->restoreFinish = true; - sDebug("vgId:%d sync event restore finish, index:%ld", ths->vgId, pEntry->index); + sDebug("vgId:%d sync event restore finish, %s, index:%ld", ths->vgId, syncUtilState2String(ths->state), + pEntry->index); } } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 996cd12e4a..79ce84abe9 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -162,9 +162,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr walFsync(pWal, true); - sDebug("vgId:%d sync event write index:%ld, %s, isStandBy:%d, msgType:%s, originalRpcType:%s", pData->pSyncNode->vgId, - pEntry->index, syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, - TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType)); + sDebug("vgId:%d sync event write index:%ld, %s, isStandBy:%d, msgType:%s,%d, originalRpcType:%s,%d", + pData->pSyncNode->vgId, pEntry->index, syncUtilState2String(pData->pSyncNode->state), + pData->pSyncNode->pRaftCfg->isStandBy, TMSG_INFO(pEntry->msgType), pEntry->msgType, + TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType); return code; } diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 5087cacd02..adc67b5d8d 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -45,8 +45,9 @@ int64_t syncRespMgrAdd(SSyncRespMgr *pObj, SRespStub *pStub) { taosHashPut(pObj->pRespHash, &keyCode, sizeof(keyCode), pStub, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr add, type:%s seq:%lu handle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), keyCode, pStub->rpcMsg.info.handle); + sDebug("vgId:%d sync event resp mgr add, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, + TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, keyCode, pStub->rpcMsg.info.handle, + pStub->rpcMsg.info.ahandle); taosThreadMutexUnlock(&(pObj->mutex)); return keyCode; @@ -69,8 +70,9 @@ int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub) { memcpy(pStub, pTmp, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr get, type:%s seq:%lu handle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), index, pStub->rpcMsg.info.handle); + sDebug("vgId:%d sync event resp mgr get, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, + TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, index, pStub->rpcMsg.info.handle, + pStub->rpcMsg.info.ahandle); taosThreadMutexUnlock(&(pObj->mutex)); return 1; // get one object @@ -87,8 +89,9 @@ int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStu memcpy(pStub, pTmp, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr get and del, type:%s seq:%lu handle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), index, pStub->rpcMsg.info.handle); + sDebug("vgId:%d sync event resp mgr get and del, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, + TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, index, pStub->rpcMsg.info.handle, + pStub->rpcMsg.info.ahandle); taosHashRemove(pObj->pRespHash, &index, sizeof(index)); taosThreadMutexUnlock(&(pObj->mutex)); diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index c92920a936..545ea953f8 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -591,6 +591,12 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (pMsg->lastConfigIndex >= SYNC_INDEX_BEGIN) { int32_t oldReplicaNum = pSyncNode->replicaNum; + // update new config myIndex + SSyncCfg newSyncCfg = pMsg->lastConfig; + syncNodeUpdateNewConfigIndex(pSyncNode, &newSyncCfg); + bool IamInNew = syncNodeInConfig(pSyncNode, &newSyncCfg); + +#if 0 // update new config myIndex bool IamInNew = false; SSyncCfg newSyncCfg = pMsg->lastConfig; @@ -602,6 +608,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { break; } } +#endif bool isDrop; if (IamInNew) { diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp index 21049454f4..1eac372bd4 100644 --- a/source/libs/sync/test/syncTestTool.cpp +++ b/source/libs/sync/test/syncTestTool.cpp @@ -297,7 +297,7 @@ void usage(char* exe) { SRpcMsg* createRpcMsg(int i, int count, int myIndex) { SRpcMsg* pMsg = (SRpcMsg*)taosMemoryMalloc(sizeof(SRpcMsg)); memset(pMsg, 0, sizeof(SRpcMsg)); - pMsg->msgType = 9999; + pMsg->msgType = TDMT_VND_SUBMIT; pMsg->contLen = 256; pMsg->pCont = rpcMallocCont(pMsg->contLen); snprintf((char*)(pMsg->pCont), pMsg->contLen, "value-myIndex:%u-%d-%d-%ld", myIndex, i, count, taosGetTimestampMs()); @@ -384,8 +384,10 @@ int main(int argc, char** argv) { leaderTransferWait++; if (leaderTransferWait == 7) { - sTrace("begin leader transfer ..."); - int32_t ret = syncLeaderTransfer(rid); + if (leaderTransfer) { + sTrace("begin leader transfer ..."); + int32_t ret = syncLeaderTransfer(rid); + } } if (alreadySend < writeRecordNum) { From 54921550097c78bd3e769b5797897a4aa720cd8d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 14:49:37 +0800 Subject: [PATCH 13/36] fix(query): fix syntax error on windows platform. --- source/libs/executor/src/sortoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 98a1aacbb6..2f2080d5fe 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -439,7 +439,7 @@ SOperatorInfo* createMultiwaySortMergeOperatorInfo(SOperatorInfo** downStreams, pInfo->bufPageSize = getProperSortPageSize(rowSize); uint32_t numOfSources = taosArrayGetSize(pSortInfo); - numOfSources = MAX(2, numOfSources); + numOfSources = TMAX(2, numOfSources); pInfo->sortBufSize = numOfSources * pInfo->bufPageSize; From d41efb2cfb284be46997466601b929b0fc4527db Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 16 Jun 2022 15:02:57 +0800 Subject: [PATCH 14/36] fix rc issue --- source/libs/scheduler/src/schJob.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 3e3f3639d7..31536f413d 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -1488,13 +1488,15 @@ int32_t schExecJobImpl(SSchedulerReq *pReq, int64_t *job, SQueryResult* pRes, bo qDebug("QID:0x%" PRIx64 " job refId 0x%"PRIx64 " started", pReq->pDag->queryId, pJob->refId); *job = pJob->refId; + if (!sync) { + pJob->userCb = SCH_EXEC_CB; + } + SCH_ERR_JRET(schLaunchJob(pJob)); if (sync) { SCH_JOB_DLOG("will wait for rsp now, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); tsem_wait(&pJob->rspSem); - } else { - pJob->userCb = SCH_EXEC_CB; } SCH_JOB_DLOG("job exec done, job status:%s, jobId:0x%"PRIx64, SCH_GET_JOB_STATUS_STR(pJob), pJob->refId); From 7aa087a48d4c5ae48e597f27c35f06b9964e5d44 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 16 Jun 2022 15:20:46 +0800 Subject: [PATCH 15/36] feat: redistribute vgroup --- include/util/taoserror.h | 8 +- source/dnode/mnode/impl/src/mndMain.c | 2 +- source/dnode/mnode/impl/src/mndSync.c | 8 -- source/dnode/vnode/src/vnd/vnodeSvr.c | 102 --------------- source/dnode/vnode/src/vnd/vnodeSync.c | 166 +++++++++++++++++++++---- source/libs/sync/src/syncMain.c | 2 +- source/util/src/terror.c | 2 +- 7 files changed, 152 insertions(+), 138 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 4f93ce9e4a..fcbe5cd878 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -410,10 +410,10 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_INVALID_CHECKSUM TAOS_DEF_ERROR_CODE(0, 0x0908) #define TSDB_CODE_SYN_INVALID_MSGLEN TAOS_DEF_ERROR_CODE(0, 0x0909) #define TSDB_CODE_SYN_INVALID_MSGTYPE TAOS_DEF_ERROR_CODE(0, 0x090A) - -#define TSDB_CODE_SYN_NOT_LEADER TAOS_DEF_ERROR_CODE(0, 0x0910) -#define TSDB_CODE_SYN_ONE_REPLICA TAOS_DEF_ERROR_CODE(0, 0x0911) -#define TSDB_CODE_SYN_NOT_IN_NEW_CONFIG TAOS_DEF_ERROR_CODE(0, 0x0912) +#define TSDB_CODE_SYN_IS_LEADER TAOS_DEF_ERROR_CODE(0, 0x090B) +#define TSDB_CODE_SYN_NOT_LEADER TAOS_DEF_ERROR_CODE(0, 0x090C) +#define TSDB_CODE_SYN_ONE_REPLICA TAOS_DEF_ERROR_CODE(0, 0x090D) +#define TSDB_CODE_SYN_NOT_IN_NEW_CONFIG TAOS_DEF_ERROR_CODE(0, 0x090E) #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index e141e7dfb3..f277a5169d 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -397,7 +397,7 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { char logBuf[512] = {0}; char *syncNodeStr = sync2SimpleStr(pMgmt->sync); - snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + snprintf(logBuf, sizeof(logBuf), "==mndProcessSyncMsg== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); static int64_t mndTick = 0; if (++mndTick % 10 == 1) { mTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 63708aef8a..e7b4489ed9 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -256,14 +256,6 @@ void mndSyncStart(SMnode *pMnode) { syncSetMsgCb(pMgmt->sync, &pMnode->msgCb); syncStart(pMgmt->sync); mDebug("mnode sync started, id:%" PRId64 " standby:%d", pMgmt->sync, pMgmt->standby); - - /* - if (pMgmt->standby) { - syncStartStandBy(pMgmt->sync); - } else { - syncStart(pMgmt->sync); - } - */ } void mndSyncStop(SMnode *pMnode) {} diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index c981f6cb99..d170931e7e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -300,108 +300,6 @@ void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { pMetaRsp->precision = pVnode->config.tsdbCfg.precision; } -int32_t vnodeProcessSyncReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { - int32_t ret = 0; - - if (syncEnvIsStart()) { - SSyncNode *pSyncNode = syncNodeAcquire(pVnode->sync); - assert(pSyncNode != NULL); - - ESyncState state = syncGetMyRole(pVnode->sync); - SyncTerm currentTerm = syncGetMyTerm(pVnode->sync); - - SMsgHead *pHead = pMsg->pCont; - - char logBuf[512] = {0}; - char *syncNodeStr = sync2SimpleStr(pVnode->sync); - snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); - static int64_t vndTick = 0; - if (++vndTick % 10 == 1) { - vTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); - } - syncRpcMsgLog2(logBuf, pMsg); - taosMemoryFree(syncNodeStr); - - SRpcMsg *pRpcMsg = pMsg; - - if (pRpcMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); - syncClientRequestDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pRpcMsg); - assert(pSyncMsg != NULL); - - ret = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); - - } else if (pRpcMsg->msgType == TDMT_SYNC_SET_VNODE_STANDBY) { - ret = syncSetStandby(pVnode->sync); - vInfo("vgId:%d, set standby result:0x%x rid:%" PRId64, pVnode->config.vgId, ret, pVnode->sync); - SRpcMsg rsp = {.code = ret, .info = pMsg->info}; - tmsgSendRsp(&rsp); - } else { - vError("==vnodeProcessSyncReq== error msg type:%d", pRpcMsg->msgType); - ret = -1; - } - - syncNodeRelease(pSyncNode); - } else { - vError("==vnodeProcessSyncReq== error syncEnv stop"); - ret = -1; - } - - if (ret != 0) { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - } - return ret; -} - static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { SVCreateStbReq req = {0}; SDecoder coder; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 25661d2e69..8662c8369b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -50,6 +50,33 @@ static inline void vnodePostBlockMsg(SVnode *pVnode, tmsg_t type) { } } +static int32_t vnodeSetStandBy(SVnode *pVnode) { + vInfo("vgId:%d, start to set standby", TD_VID(pVnode)); + + if (syncSetStandby(pVnode->sync) == 0) { + vInfo("vgId:%d, set standby success", TD_VID(pVnode)); + return 0; + } else if (terrno != TSDB_CODE_SYN_IS_LEADER) { + vError("vgId:%d, failed to set standby since %s", TD_VID(pVnode), terrstr()); + return -1; + } + + if (syncLeaderTransfer(pVnode->sync) != 0) { + vError("vgId:%d, failed to transfer leader since:%s", TD_VID(pVnode), terrstr()); + return -1; + } else { + vInfo("vgId:%d, transfer leader success", TD_VID(pVnode)); + } + + if (syncSetStandby(pVnode->sync) == 0) { + vInfo("vgId:%d, set standby success", TD_VID(pVnode)); + return 0; + } else { + vError("vgId:%d, failed to set standby since %s", TD_VID(pVnode), terrstr()); + return -1; + } +} + static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { SAlterVnodeReq req = {0}; if (tDeserializeSAlterVnodeReq((char *)pMsg->pCont + sizeof(SMsgHead), pMsg->contLen - sizeof(SMsgHead), &req) != 0) { @@ -74,14 +101,19 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = syncPropose(pVnode->sync, &rpcMsg, false); if (code != 0) { - vDebug("vgId:%d, failed to propose reconfig msg since %s", TD_VID(pVnode), terrstr()); - if (syncLeaderTransfer(pVnode->sync) != 0) { - vError("vgId:%d, failed to transfer leader since %s", TD_VID(pVnode), terrstr()); - } else { - vDebug("vgId:%d, transfer leader success, propose reconfig config again", TD_VID(pVnode)); + if (terrno != 0) code = terrno; + + vInfo("vgId:%d, failed to propose reconfig msg since %s", TD_VID(pVnode), terrstr()); + if (terrno == TSDB_CODE_SYN_IS_LEADER) { + if (syncLeaderTransfer(pVnode->sync) != 0) { + vError("vgId:%d, failed to transfer leader since %s", TD_VID(pVnode), terrstr()); + } else { + vInfo("vgId:%d, transfer leader success", TD_VID(pVnode)); + } } } + terrno = code; return code; } @@ -108,7 +140,6 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { if (code == 0) { vnodeAccumBlockMsg(pVnode, pMsg->msgType); - } else if (code == -1 && terrno == TSDB_CODE_SYN_NOT_LEADER) { SEpSet newEpSet = {0}; syncGetEpSet(pVnode->sync, &newEpSet); @@ -170,6 +201,108 @@ void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { } } +int32_t vnodeProcessSyncReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { + int32_t ret = 0; + + if (syncEnvIsStart()) { + SSyncNode *pSyncNode = syncNodeAcquire(pVnode->sync); + assert(pSyncNode != NULL); + + ESyncState state = syncGetMyRole(pVnode->sync); + SyncTerm currentTerm = syncGetMyTerm(pVnode->sync); + + SMsgHead *pHead = pMsg->pCont; + + char logBuf[512] = {0}; + char *syncNodeStr = sync2SimpleStr(pVnode->sync); + snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + static int64_t vndTick = 0; + if (++vndTick % 10 == 1) { + vTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); + } + syncRpcMsgLog2(logBuf, pMsg); + taosMemoryFree(syncNodeStr); + + SRpcMsg *pRpcMsg = pMsg; + + if (pRpcMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_SET_VNODE_STANDBY) { + ret = vnodeSetStandBy(pVnode); + if (ret != 0 && terrno != 0) ret = terrno; + SRpcMsg rsp = {.code = ret, .info = pMsg->info}; + tmsgSendRsp(&rsp); + } else { + vError("==vnodeProcessSyncReq== error msg type:%d", pRpcMsg->msgType); + ret = -1; + } + + syncNodeRelease(pSyncNode); + } else { + vError("==vnodeProcessSyncReq== error syncEnv stop"); + ret = -1; + } + + if (ret != 0) { + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + } + return ret; +} + static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); if (code != 0) { @@ -258,17 +391,17 @@ static void vnodeSyncRollBackMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta syncRpcMsgLog2(logBuf, (SRpcMsg *)pMsg); } -int32_t vnodeSnapshotStartRead(struct SSyncFSM *pFsm, void **ppReader) { return 0; } +static int32_t vnodeSnapshotStartRead(struct SSyncFSM *pFsm, void **ppReader) { return 0; } -int32_t vnodeSnapshotStopRead(struct SSyncFSM *pFsm, void *pReader) { return 0; } +static int32_t vnodeSnapshotStopRead(struct SSyncFSM *pFsm, void *pReader) { return 0; } -int32_t vnodeSnapshotDoRead(struct SSyncFSM *pFsm, void *pReader, void **ppBuf, int32_t *len) { return 0; } +static int32_t vnodeSnapshotDoRead(struct SSyncFSM *pFsm, void *pReader, void **ppBuf, int32_t *len) { return 0; } -int32_t vnodeSnapshotStartWrite(struct SSyncFSM *pFsm, void **ppWriter) { return 0; } +static int32_t vnodeSnapshotStartWrite(struct SSyncFSM *pFsm, void **ppWriter) { return 0; } -int32_t vnodeSnapshotStopWrite(struct SSyncFSM *pFsm, void *pWriter, bool isApply) { return 0; } +static int32_t vnodeSnapshotStopWrite(struct SSyncFSM *pFsm, void *pWriter, bool isApply) { return 0; } -int32_t vnodeSnapshotDoWrite(struct SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) { return 0; } +static int32_t vnodeSnapshotDoWrite(struct SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) { return 0; } static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { SSyncFSM *pFsm = taosMemoryCalloc(1, sizeof(SSyncFSM)); @@ -279,7 +412,6 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { pFsm->FpGetSnapshot = vnodeSyncGetSnapshot; pFsm->FpRestoreFinishCb = NULL; pFsm->FpReConfigCb = vnodeSyncReconfig; - pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead; pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead; pFsm->FpSnapshotDoRead = vnodeSnapshotDoRead; @@ -292,7 +424,6 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { SSyncInfo syncInfo = { - .isStandBy = false, .snapshotEnable = false, .vgId = pVnode->config.vgId, .isStandBy = pVnode->config.standby, @@ -321,13 +452,6 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { void vnodeSyncStart(SVnode *pVnode) { syncSetMsgCb(pVnode->sync, &pVnode->msgCb); syncStart(pVnode->sync); - /* - if (pVnode->config.standby) { - syncStartStandBy(pVnode->sync); - } else { - syncStart(pVnode->sync); - } - */ } void vnodeSyncClose(SVnode *pVnode) { syncStop(pVnode->sync); } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 85bdf18ab4..03a4f7b2b3 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -156,7 +156,7 @@ int32_t syncSetStandby(int64_t rid) { if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_IS_LEADER; sError("failed to set standby since it is leader, rid:%" PRId64, rid); return -1; } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index fe040df6fc..596aab0e09 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -412,7 +412,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_MISMATCHED_SIGNATURE, "Mismatched signature" TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_CHECKSUM, "Invalid msg checksum") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_MSGLEN, "Invalid msg length") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_MSGTYPE, "Invalid msg type") - +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_IS_LEADER, "Sync is leader") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_LEADER, "Sync not leader") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_ONE_REPLICA, "Sync one replica") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_IN_NEW_CONFIG, "Sync not in new config") From 855f1842c2760e038c8f3cda43b1ef1455e7a12c Mon Sep 17 00:00:00 2001 From: tangfangzhi Date: Thu, 16 Jun 2022 15:28:52 +0800 Subject: [PATCH 16/36] ci: allow to run specified cases in windows full test --- tests/system-test/test-all.bat | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/system-test/test-all.bat b/tests/system-test/test-all.bat index 0929b1fc6e..275cbeebbb 100644 --- a/tests/system-test/test-all.bat +++ b/tests/system-test/test-all.bat @@ -5,7 +5,12 @@ set /a a=0 if %1 == full ( echo Windows Taosd Full Test set /a exitNum=0 - for /F "usebackq tokens=*" %%i in (fulltest.bat) do ( + del /Q /F failed.txt + set caseFile="fulltest.bat" + if not "%2" == "" ( + set caseFile="%2" + ) + for /F "usebackq tokens=*" %%i in (!caseFile!) do ( for /f "tokens=1* delims= " %%a in ("%%i") do if not "%%a" == "@REM" ( set /a a+=1 echo !a! Processing %%i @@ -13,7 +18,7 @@ if %1 == full ( set time1=!_timeTemp! echo Start at !time! call %%i ARG1 > result_!a!.txt 2>error_!a!.txt - if errorlevel 1 ( call :colorEcho 0c "failed" &echo. && set /a exitNum=8 ) else ( call :colorEcho 0a "Success" &echo. ) + if errorlevel 1 ( call :colorEcho 0c "failed" &echo. && set /a exitNum=8 && echo %%i >>failed.txt ) else ( call :colorEcho 0a "Success" &echo. ) ) ) exit !exitNum! @@ -77,4 +82,4 @@ for %%a in (%tt%) do ( set /a index=index+1 ) set /a _timeTemp=(%hh%*60+%mm%)*60+%ss% -goto :eof \ No newline at end of file +goto :eof From dbef3dfc55d19e1d3e182ef61f23629fe7261eff Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 16 Jun 2022 15:34:33 +0800 Subject: [PATCH 17/36] fix: not submit for rsma without rollup --- source/common/src/tdatablock.c | 42 ++++++++++++++++---------- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 14 +++++++-- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 5a2aaed74e..3ec1b98b16 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1583,6 +1583,11 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks int32_t rowSize = pDataBlock->info.rowSize; int64_t groupId = pDataBlock->info.groupId; + if (colNum <= 1) { + // invalid if only with TS col + continue; + } + if (rb.nCols != colNum) { tdSRowSetTpInfo(&rb, colNum, pTSchema->flen); } @@ -1679,23 +1684,28 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks msgLen += pSubmitBlk->dataLen; } - (*pReq)->length = msgLen; + if (numOfBlks > 0) { + (*pReq)->length = msgLen; - (*pReq)->header.vgId = htonl(vgId); - (*pReq)->header.contLen = htonl(msgLen); - (*pReq)->length = (*pReq)->header.contLen; - (*pReq)->numOfBlocks = htonl(numOfBlks); - SSubmitBlk* blk = (SSubmitBlk*)((*pReq) + 1); - while (numOfBlks--) { - int32_t dataLen = blk->dataLen; - blk->uid = htobe64(blk->uid); - blk->suid = htobe64(blk->suid); - blk->padding = htonl(blk->padding); - blk->sversion = htonl(blk->sversion); - blk->dataLen = htonl(blk->dataLen); - blk->schemaLen = htonl(blk->schemaLen); - blk->numOfRows = htons(blk->numOfRows); - blk = (SSubmitBlk*)(blk->data + dataLen); + (*pReq)->header.vgId = htonl(vgId); + (*pReq)->header.contLen = htonl(msgLen); + (*pReq)->length = (*pReq)->header.contLen; + (*pReq)->numOfBlocks = htonl(numOfBlks); + SSubmitBlk* blk = (SSubmitBlk*)((*pReq) + 1); + while (numOfBlks--) { + int32_t dataLen = blk->dataLen; + blk->uid = htobe64(blk->uid); + blk->suid = htobe64(blk->suid); + blk->padding = htonl(blk->padding); + blk->sversion = htonl(blk->sversion); + blk->dataLen = htonl(blk->dataLen); + blk->schemaLen = htonl(blk->schemaLen); + blk->numOfRows = htons(blk->numOfRows); + blk = (SSubmitBlk*)(blk->data + dataLen); + } + } else { + // no valid rows + taosMemoryFreeClear(*pReq); } return TSDB_CODE_SUCCESS; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 2fd0815181..553c6a40ab 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -260,6 +260,7 @@ struct SSma { #define SMA_CFG(s) (&(s)->pVnode->config) #define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) +#define SMA_RETENTION(s) ((SRetention *)&(s)->pVnode->config.tsdbCfg.retentions) #define SMA_LOCKED(s) ((s)->locked) #define SMA_META(s) ((s)->pVnode->pMeta) #define SMA_VID(s) TD_VID((s)->pVnode) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index b2dcce8f4c..0c372dfa70 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -400,22 +400,24 @@ static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int3 } if (taosArrayGetSize(pResult) > 0) { -#if 1 +#if 0 char flag[10] = {0}; snprintf(flag, 10, "level %" PRIi8, level); blockDebugShowData(pResult, flag); #endif STsdb *sinkTsdb = (level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb1 : pSma->pRSmaTsdb2); SSubmitReq *pReq = NULL; - if (buildSubmitReqFromDataBlock(&pReq, pResult, pTSchema, SMA_VID(pSma), suid) != 0) { + if (buildSubmitReqFromDataBlock(&pReq, pResult, pTSchema, SMA_VID(pSma), suid) < 0) { taosArrayDestroy(pResult); return TSDB_CODE_FAILED; } - if (tdProcessSubmitReq(sinkTsdb, INT64_MAX, pReq) != 0) { + + if (pReq && tdProcessSubmitReq(sinkTsdb, INT64_MAX, pReq) < 0) { taosArrayDestroy(pResult); taosMemoryFreeClear(pReq); return TSDB_CODE_FAILED; } + taosMemoryFreeClear(pReq); } else { smaDebug("vgId:%d, no rsma % " PRIi8 " data generated since %s", SMA_VID(pSma), level, tstrerror(terrno)); @@ -469,6 +471,12 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, void *pMsg, int32_t inputType) { return TSDB_CODE_SUCCESS; } + SRetention *pRetention = SMA_RETENTION(pSma); + if (!RETENTION_VALID(pRetention + 1)) { + // return directly if retention level 1 is invalid + return TSDB_CODE_SUCCESS; + } + if (inputType == STREAM_DATA_TYPE_SUBMIT_BLOCK) { STbUidStore uidStore = {0}; tdFetchSubmitReqSuids(pMsg, &uidStore); From 9a9200d28b93ae286f19b530688c3d28dfce94e6 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 16 Jun 2022 15:38:40 +0800 Subject: [PATCH 18/36] refactor(sync): add restore finish when become leader again --- source/libs/sync/src/syncAppendEntries.c | 7 +- source/libs/sync/src/syncAppendEntriesReply.c | 13 +- source/libs/sync/src/syncCommit.c | 5 +- source/libs/sync/src/syncMain.c | 70 +++++----- source/libs/sync/src/syncRaftLog.c | 16 ++- source/libs/sync/src/syncRespMgr.c | 19 +-- source/libs/sync/src/syncSnapshot.c | 131 ++++++++++-------- 7 files changed, 147 insertions(+), 114 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 08a4081ad3..89b4761212 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -713,7 +713,8 @@ static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { // delete confict entries code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); ASSERT(code == 0); - sDebug("vgId:%d sync event log truncate, from %ld to %ld", ths->vgId, delBegin, delEnd); + sDebug("vgId:%d sync event currentTerm:%lu log truncate, from %ld to %ld", ths->vgId, ths->pRaftStore->currentTerm, + delBegin, delEnd); logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); return code; @@ -994,8 +995,8 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs SyncIndex commitEnd = snapshot.lastApplyIndex; ths->commitIndex = snapshot.lastApplyIndex; - sDebug("vgId:%d sync event commit by snapshot from index:%ld to index:%ld, %s", ths->vgId, commitBegin, - commitEnd, syncUtilState2String(ths->state)); + sDebug("vgId:%d sync event currentTerm:%lu commit by snapshot from index:%ld to index:%ld, %s", ths->vgId, + ths->pRaftStore->currentTerm, commitBegin, commitEnd, syncUtilState2String(ths->state)); } SyncIndex beginIndex = ths->commitIndex + 1; diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index fbe3319675..5290d7d28e 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -190,18 +190,19 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries if (gRaftDetailLog) { char* s = snapshotSender2Str(pSender); sDebug( - "vgId:%d sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu " "sender:%s", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - pSender->snapshot.lastConfigIndex, pSender->privateTerm, s); + ths->vgId, ths->pRaftStore->currentTerm, host, port, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm, s); taosMemoryFree(s); } else { sDebug( - "vgId:%d sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " "lastApplyTerm:%lu lastConfigIndex:%ld privateTerm:%lu", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - pSender->snapshot.lastConfigIndex, pSender->privateTerm); + ths->vgId, ths->pRaftStore->currentTerm, host, port, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); } } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 3424fac5e7..efdb5b24ce 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -56,8 +56,9 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { SyncIndex commitEnd = snapshot.lastApplyIndex; pSyncNode->commitIndex = snapshot.lastApplyIndex; - sDebug("vgId:%d sync event commit by snapshot from index:%ld to index:%ld, %s", pSyncNode->vgId, - pSyncNode->commitIndex, snapshot.lastApplyIndex, syncUtilState2String(pSyncNode->state)); + sDebug("vgId:%d sync event currentTerm:%lu commit by snapshot from index:%ld to index:%ld, %s", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, snapshot.lastApplyIndex, + syncUtilState2String(pSyncNode->state)); } // update commit index diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 5e3c1adf32..05a41daa9e 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -562,7 +562,8 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { return -1; } assert(rid == pSyncNode->rid); - sDebug("vgId:%d sync event propose msgType:%s,%d", pSyncNode->vgId, TMSG_INFO(pMsg->msgType), pMsg->msgType); + sDebug("vgId:%d sync event currentTerm:%lu propose msgType:%s,%d", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, TMSG_INFO(pMsg->msgType), pMsg->msgType); ret = syncNodePropose(pSyncNode, pMsg, isWeak); taosReleaseRef(tsNodeRefId, pSyncNode->rid); @@ -571,7 +572,8 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { int32_t syncNodePropose(SSyncNode* pSyncNode, const SRpcMsg* pMsg, bool isWeak) { int32_t ret = 0; - sDebug("vgId:%d sync event propose msgType:%s,%d", pSyncNode->vgId, TMSG_INFO(pMsg->msgType), pMsg->msgType); + sDebug("vgId:%d sync event currentTerm:%lu propose msgType:%s,%d", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, TMSG_INFO(pMsg->msgType), pMsg->msgType); if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { SRespStub stub; @@ -604,8 +606,6 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, const SRpcMsg* pMsg, bool isWeak) SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { SSyncInfo* pSyncInfo = (SSyncInfo*)pOldSyncInfo; - sDebug("vgId:%d sync event sync open", pSyncInfo->vgId); - SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); assert(pSyncNode != NULL); memset(pSyncNode, 0, sizeof(SSyncNode)); @@ -819,6 +819,8 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { // snapshot meta // pSyncNode->sMeta.lastConfigIndex = -1; + sDebug("vgId:%d sync event currentTerm:%lu sync open", pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm); + return pSyncNode; } @@ -864,7 +866,7 @@ void syncNodeStartStandBy(SSyncNode* pSyncNode) { } void syncNodeClose(SSyncNode* pSyncNode) { - sDebug("vgId:%d sync event sync close", pSyncNode->vgId); + sDebug("vgId:%d sync event currentTerm:%lu sync close", pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm); int32_t ret; assert(pSyncNode != NULL); @@ -1303,8 +1305,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { oldSenders[i] = (pSyncNode->senders)[i]; - sDebug("vgId:%d sync event save senders %d, %p, privateTerm:%lu", pSyncNode->vgId, i, oldSenders[i], - oldSenders[i]->privateTerm); + sDebug("vgId:%d sync event currentTerm:%lu save senders %d, %p, privateTerm:%lu", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, i, oldSenders[i], oldSenders[i]->privateTerm); if (gRaftDetailLog) { ; } @@ -1356,8 +1358,9 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex char host[128]; uint16_t port; syncUtilU642Addr((pSyncNode->replicasId)[i].addr, host, sizeof(host), &port); - sDebug("vgId:%d sync event reset sender for %lu, newIndex:%d, %s:%d, %p, privateTerm:%lu", pSyncNode->vgId, - (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j], oldSenders[j]->privateTerm); + sDebug("vgId:%d sync event currentTerm:%lu reset sender for %lu, newIndex:%d, %s:%d, %p, privateTerm:%lu", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, (pSyncNode->replicasId)[i].addr, i, host, port, + oldSenders[j], oldSenders[j]->privateTerm); (pSyncNode->senders)[i] = oldSenders[j]; oldSenders[j] = NULL; reset = true; @@ -1365,8 +1368,9 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex // reset replicaIndex int32_t oldreplicaIndex = (pSyncNode->senders)[i]->replicaIndex; (pSyncNode->senders)[i]->replicaIndex = i; - sDebug("vgId:%d sync event udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", pSyncNode->vgId, - oldreplicaIndex, i, host, port, (pSyncNode->senders)[i], reset); + sDebug("vgId:%d sync event currentTerm:%lu udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, oldreplicaIndex, i, host, port, + (pSyncNode->senders)[i], reset); } } } @@ -1375,8 +1379,9 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { if ((pSyncNode->senders)[i] == NULL) { (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); - sDebug("vgId:%d sync event create new sender %p replicaIndex:%d, privateTerm:%lu", pSyncNode->vgId, - (pSyncNode->senders)[i], i, (pSyncNode->senders)[i]->privateTerm); + sDebug("vgId:%d sync event currentTerm:%lu create new sender %p replicaIndex:%d, privateTerm:%lu", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, (pSyncNode->senders)[i], i, + (pSyncNode->senders)[i]->privateTerm); } } @@ -1384,7 +1389,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { if (oldSenders[i] != NULL) { snapshotSenderDestroy(oldSenders[i]); - sDebug("vgId:%d sync event delete old sender %p replicaIndex:%d", pSyncNode->vgId, oldSenders[i], i); + sDebug("vgId:%d sync event currentTerm:%lu delete old sender %p replicaIndex:%d", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, oldSenders[i], i); oldSenders[i] = NULL; } } @@ -1454,9 +1460,11 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { } void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { - sDebug("vgId:%d sync event become follower, isStandBy:%d, currentTerm:%lu, replicaNum:%d, restoreFinish:%d, %s", - pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftStore->currentTerm, pSyncNode->replicaNum, - pSyncNode->restoreFinish, debugStr); + sDebug( + "vgId:%d sync event currentTerm:%lu become follower, isStandBy:%d, replicaNum:%d, " + "restoreFinish:%d, %s", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, + pSyncNode->restoreFinish, debugStr); // maybe clear leader cache if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { @@ -1493,8 +1501,8 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { // reset restoreFinish pSyncNode->restoreFinish = false; - sDebug("vgId:%d sync event become leader, isStandBy:%d, currentTerm:%lu, replicaNum:%d, restoreFinish:%d, %s", - pSyncNode->vgId, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftStore->currentTerm, pSyncNode->replicaNum, + sDebug("vgId:%d sync event currentTerm:%lu become leader, isStandBy:%d, replicaNum:%d, restoreFinish:%d, %s", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, pSyncNode->restoreFinish, debugStr); // state change @@ -2069,21 +2077,13 @@ const char* syncStr(ESyncState state) { static int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { SyncLeaderTransfer* pSyncLeaderTransfer = syncLeaderTransferFromRpcMsg2(pRpcMsg); - /* - char host[128]; - uint16_t port; - syncUtilU642Addr(pSyncLeaderTransfer->newLeaderId.addr, host, sizeof(host), &port); - sDebug("vgId:%d sync event, maybe leader transfer to %s:%d %lu", ths->vgId, host, port, - pSyncLeaderTransfer->newLeaderId.addr); - */ - - sDebug("vgId:%d sync event begin leader transfer", ths->vgId); + sDebug("vgId:%d sync event currentTerm:%lu begin leader transfer", ths->vgId, ths->pRaftStore->currentTerm); if (strcmp(pSyncLeaderTransfer->newNodeInfo.nodeFqdn, ths->myNodeInfo.nodeFqdn) == 0 && pSyncLeaderTransfer->newNodeInfo.nodePort == ths->myNodeInfo.nodePort) { - sDebug("vgId:%d sync event maybe leader transfer to %s:%d %lu", ths->vgId, - pSyncLeaderTransfer->newNodeInfo.nodeFqdn, pSyncLeaderTransfer->newNodeInfo.nodePort, - pSyncLeaderTransfer->newLeaderId.addr); + sDebug("vgId:%d sync event currentTerm:%lu maybe leader transfer to %s:%d %lu", ths->vgId, + ths->pRaftStore->currentTerm, pSyncLeaderTransfer->newNodeInfo.nodeFqdn, + pSyncLeaderTransfer->newNodeInfo.nodePort, pSyncLeaderTransfer->newLeaderId.addr); // reset elect timer now! int32_t electMS = 1; @@ -2205,8 +2205,8 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { int32_t code = 0; ESyncState state = flag; - sDebug("vgId:%d sync event commit by wal from index:%" PRId64 " to index:%" PRId64 ", %s", ths->vgId, beginIndex, - endIndex, syncUtilState2String(state)); + sDebug("vgId:%d sync event currentTerm:%lu commit by wal from index:%" PRId64 " to index:%" PRId64 ", %s", ths->vgId, + ths->pRaftStore->currentTerm, beginIndex, endIndex, syncUtilState2String(state)); // execute fsm if (ths->pFsm != NULL) { @@ -2254,8 +2254,8 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, ths->pFsm->FpRestoreFinishCb(ths->pFsm); } ths->restoreFinish = true; - sDebug("vgId:%d sync event restore finish, %s, index:%ld", ths->vgId, syncUtilState2String(ths->state), - pEntry->index); + sDebug("vgId:%d sync event currentTerm:%lu restore finish, %s, index:%ld", ths->vgId, + ths->pRaftStore->currentTerm, syncUtilState2String(ths->state), pEntry->index); } } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 79ce84abe9..31ac44fa81 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -15,6 +15,7 @@ #include "syncRaftLog.h" #include "syncRaftCfg.h" +#include "syncRaftStore.h" #include "wal.h" // refactor, log[0 .. n] ==> log[m .. n] @@ -162,10 +163,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr walFsync(pWal, true); - sDebug("vgId:%d sync event write index:%ld, %s, isStandBy:%d, msgType:%s,%d, originalRpcType:%s,%d", - pData->pSyncNode->vgId, pEntry->index, syncUtilState2String(pData->pSyncNode->state), - pData->pSyncNode->pRaftCfg->isStandBy, TMSG_INFO(pEntry->msgType), pEntry->msgType, - TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType); + sDebug("vgId:%d sync event currentTerm:%lu write index:%ld, %s, isStandBy:%d, msgType:%s,%d, originalRpcType:%s,%d", + pData->pSyncNode->vgId, pData->pSyncNode->pRaftStore->currentTerm, pEntry->index, + syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, + TMSG_INFO(pEntry->msgType), pEntry->msgType, TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType); return code; } @@ -321,7 +322,12 @@ int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { walFsync(pWal, true); - sDebug("sync event old write wal: %ld", pEntry->index); + sDebug( + "vgId:%d sync event currentTerm:%lu old write index:%ld, %s, isStandBy:%d, msgType:%s,%d, originalRpcType:%s,%d", + pData->pSyncNode->vgId, pData->pSyncNode->pRaftStore->currentTerm, pEntry->index, + syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, TMSG_INFO(pEntry->msgType), + pEntry->msgType, TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType); + return code; } diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index adc67b5d8d..5b127793d6 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -14,6 +14,7 @@ */ #include "syncRespMgr.h" +#include "syncRaftStore.h" SSyncRespMgr *syncRespMgrCreate(void *data, int64_t ttl) { SSyncRespMgr *pObj = (SSyncRespMgr *)taosMemoryMalloc(sizeof(SSyncRespMgr)); @@ -45,9 +46,9 @@ int64_t syncRespMgrAdd(SSyncRespMgr *pObj, SRespStub *pStub) { taosHashPut(pObj->pRespHash, &keyCode, sizeof(keyCode), pStub, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr add, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, keyCode, pStub->rpcMsg.info.handle, - pStub->rpcMsg.info.ahandle); + sDebug("vgId:%d sync event currentTerm:%lu resp mgr add, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, + pSyncNode->pRaftStore->currentTerm, TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, keyCode, + pStub->rpcMsg.info.handle, pStub->rpcMsg.info.ahandle); taosThreadMutexUnlock(&(pObj->mutex)); return keyCode; @@ -70,9 +71,9 @@ int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub) { memcpy(pStub, pTmp, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr get, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, index, pStub->rpcMsg.info.handle, - pStub->rpcMsg.info.ahandle); + sDebug("vgId:%d sync event currentTerm:%lu resp mgr get, msgType:%s,%d seq:%lu handle:%p ahandle:%p", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, + index, pStub->rpcMsg.info.handle, pStub->rpcMsg.info.ahandle); taosThreadMutexUnlock(&(pObj->mutex)); return 1; // get one object @@ -89,9 +90,9 @@ int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStu memcpy(pStub, pTmp, sizeof(SRespStub)); SSyncNode *pSyncNode = pObj->data; - sDebug("vgId:%d sync event resp mgr get and del, msgType:%s,%d seq:%lu handle:%p ahandle:%p", pSyncNode->vgId, - TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, index, pStub->rpcMsg.info.handle, - pStub->rpcMsg.info.ahandle); + sDebug("vgId:%d sync event currentTerm:%lu resp mgr get and del, msgType:%s,%d seq:%lu handle:%p ahandle:%p", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, TMSG_INFO(pStub->rpcMsg.msgType), pStub->rpcMsg.msgType, + index, pStub->rpcMsg.info.handle, pStub->rpcMsg.info.ahandle); taosHashRemove(pObj->pRespHash, &index, sizeof(index)); taosThreadMutexUnlock(&(pObj->mutex)); diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 545ea953f8..d3785060d4 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -141,18 +141,22 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu send " "msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm, msgStr); + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, + pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, + pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -279,25 +283,31 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu send " "msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm, msgStr); + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, + pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, + pSender->privateTerm); } } else { sDebug( - "vgId:%d sync event snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu " + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld " + "lastApplyTerm:%lu " "lastConfigIndex:%ld privateTerm:%lu", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->privateTerm); + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, + pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -328,12 +338,15 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); - sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu send msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->privateTerm, msgStr); + sDebug( + "vgId:%d sync event currentTerm:%lu snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu send msg:%s", + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, pSender->ack, + pSender->privateTerm, msgStr); taosMemoryFree(msgStr); } else { - sDebug("vgId:%d sync event snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu", pSender->pSyncNode->vgId, - host, port, pSender->seq, pSender->ack, pSender->privateTerm); + sDebug("vgId:%d sync event currentTerm:%lu snapshot send to %s:%d resend seq:%d ack:%d privateTerm:%lu", + pSender->pSyncNode->vgId, pSender->pSyncNode->pRaftStore->currentTerm, host, port, pSender->seq, + pSender->ack, pSender->privateTerm); } syncSnapshotSendDestroy(pMsg); @@ -485,7 +498,7 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply) { pReceiver->start = false; if (apply) { - ++(pReceiver->privateTerm); + // ++(pReceiver->privateTerm); } if (gRaftDetailLog) { @@ -566,17 +579,17 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - pReceiver->privateTerm, msgStr); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, pMsg->lastIndex, + pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, " "lastConfigIndex:%ld privateTerm:%lu", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - pReceiver->privateTerm); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, pMsg->lastIndex, + pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm); } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { @@ -612,14 +625,19 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { bool isDrop; if (IamInNew) { - sDebug("vgId:%d sync event update config by snapshot, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld ", - pSyncNode->vgId, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + sDebug( + "vgId:%d sync event currentTerm:%lu update config by snapshot, lastIndex:%ld, lastTerm:%lu, " + "lastConfigIndex:%ld ", + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pMsg->lastIndex, pMsg->lastTerm, + pMsg->lastConfigIndex); syncNodeUpdateConfig(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); } else { sDebug( - "vgId:%d sync event do not update config by snapshot, I am not in newCfg, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu do not update config by snapshot, I am not in newCfg, " + "lastIndex:%ld, lastTerm:%lu, " "lastConfigIndex:%ld ", - pSyncNode->vgId, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pMsg->lastIndex, pMsg->lastTerm, + pMsg->lastConfigIndex); } // change isStandBy to normal @@ -644,19 +662,20 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (gRaftDetailLog) { char *logSimpleStr = logStoreSimple2Str(pSyncNode->pLogStore); sDebug( - "vgId:%d sync event snapshot recv from %s:%d finish, update log begin index:%ld, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d finish, update log begin index:%ld, " "snapshot.lastApplyIndex:%ld, " "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld, privateTerm:%lu, raft log:%s", - pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - snapshot.lastConfigIndex, pReceiver->privateTerm, logSimpleStr); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pMsg->lastIndex + 1, + snapshot.lastApplyIndex, snapshot.lastApplyTerm, snapshot.lastConfigIndex, pReceiver->privateTerm, + logSimpleStr); taosMemoryFree(logSimpleStr); } else { sDebug( - "vgId:%d sync event snapshot recv from %s:%d finish, update log begin index:%ld, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d finish, update log begin index:%ld, " "snapshot.lastApplyIndex:%ld, " "snapshot.lastApplyTerm:%lu, snapshot.lastConfigIndex:%ld, privateTerm:%lu", - pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - snapshot.lastConfigIndex, pReceiver->privateTerm); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pMsg->lastIndex + 1, + snapshot.lastApplyIndex, snapshot.lastApplyTerm, snapshot.lastConfigIndex, pReceiver->privateTerm); } pReceiver->pWriter = NULL; @@ -667,17 +686,17 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); + pReceiver->pSyncNode->vgId, pReceiver->pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, pReceiver->privateTerm); + pReceiver->pSyncNode->vgId, pReceiver->pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm); } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { @@ -692,18 +711,20 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, " + "lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu, recv " "msg:%s", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); + pReceiver->pSyncNode->vgId, pReceiver->pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, " + "lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, - pMsg->lastConfigIndex, pReceiver->privateTerm); + pReceiver->pSyncNode->vgId, pReceiver->pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, + pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm); } } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { @@ -723,17 +744,19 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (gRaftDetailLog) { char *msgStr = syncSnapshotSend2Str(pMsg); sDebug( - "vgId:%d sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, " + "lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu, recv msg:%s", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - pReceiver->privateTerm, msgStr); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, pMsg->lastIndex, + pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm, msgStr); taosMemoryFree(msgStr); } else { sDebug( - "vgId:%d sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, " + "vgId:%d sync event currentTerm:%lu snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, " + "lastTerm:%lu, " "lastConfigIndex:%ld, privateTerm:%lu", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex, - pReceiver->privateTerm); + pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, host, port, pReceiver->ack, pMsg->lastIndex, + pMsg->lastTerm, pMsg->lastConfigIndex, pReceiver->privateTerm); } } else { From 528d7d7248cbafb734aec558368cacbcaa832551 Mon Sep 17 00:00:00 2001 From: afwerar <1296468573@qq.com> Date: Thu, 16 Jun 2022 15:46:51 +0800 Subject: [PATCH 19/36] os: add dll check --- include/os/osSysinfo.h | 1 + source/client/src/clientHb.c | 4 +++- source/libs/catalog/src/ctgCache.c | 4 +++- source/os/src/osSysinfo.c | 16 ++++++++++++++++ source/util/src/tcache.c | 6 +++++- 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index c009bcf350..4ec2e2884e 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -68,6 +68,7 @@ typedef struct { } SysNameInfo; SysNameInfo taosGetSysNameInfo(); +bool taosCheckCurrentInDll(); #ifdef __cplusplus } diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 5db27ae438..c450e9351f 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -588,7 +588,9 @@ void hbThreadFuncUnexpectedStopped(void) { static void *hbThreadFunc(void *param) { setThreadName("hb"); #ifdef WINDOWS - atexit(hbThreadFuncUnexpectedStopped); + if (taosCheckCurrentInDll()) { + atexit(hbThreadFuncUnexpectedStopped); + } #endif while (1) { int8_t threadStop = atomic_val_compare_exchange_8(&clientHbMgr.threadStop, 1, 2); diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index 277516686b..0948c01270 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -1938,7 +1938,9 @@ void ctgCleanupCacheQueue(void) { void* ctgUpdateThreadFunc(void* param) { setThreadName("catalog"); #ifdef WINDOWS - atexit(ctgUpdateThreadUnexpectedStopped); + if (taosCheckCurrentInDll()) { + atexit(ctgUpdateThreadUnexpectedStopped); + } #endif qInfo("catalog update thread started"); diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index ace870853f..4981f7dc26 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -945,3 +945,19 @@ SysNameInfo taosGetSysNameInfo() { return info; #endif } + + +bool taosCheckCurrentInDll() { +#ifdef WINDOWS + MEMORY_BASIC_INFORMATION mbi; + char path[PATH_MAX] = {0}; + GetModuleFileName(((VirtualQuery(taosCheckCurrentInDll,&mbi,sizeof(mbi)) != 0) ? (HMODULE)mbi.AllocationBase : NULL), path, PATH_MAX); + int strLastIndex = strlen(path); + if ((path[strLastIndex-3] == 'd' || path[strLastIndex-3] == 'D') && (path[strLastIndex-2] == 'l' || path[strLastIndex-2] == 'L') && (path[strLastIndex-1] == 'l' || path[strLastIndex-1] == 'L')) { + return true; + } + return false; +#else + return false; +#endif +} \ No newline at end of file diff --git a/source/util/src/tcache.c b/source/util/src/tcache.c index 10a5475555..1a716752a9 100644 --- a/source/util/src/tcache.c +++ b/source/util/src/tcache.c @@ -829,7 +829,11 @@ void *taosCacheTimedRefresh(void *handle) { const int32_t SLEEP_DURATION = 500; // 500 ms int64_t count = 0; - atexit(taosCacheRefreshWorkerUnexpectedStopped); +#ifdef WINDOWS + if (taosCheckCurrentInDll()) { + atexit(taosCacheRefreshWorkerUnexpectedStopped); + } +#endif while (1) { taosMsleep(SLEEP_DURATION); From 14b6523bd34981a1cd7b86dbddc711e278101a32 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 16 Jun 2022 15:56:25 +0800 Subject: [PATCH 20/36] refactor: show rpc response code --- source/libs/transport/src/transCli.c | 4 ++-- source/libs/transport/src/transSvr.c | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index fbf4d6aa9f..3d570e0860 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -324,9 +324,9 @@ void cliHandleResp(SCliConn* conn) { tDebug("%s cli conn %p ref by app", CONN_GET_INST_LABEL(conn), conn); } - tDebug("%s cli conn %p %s received from %s:%d, local info: %s:%d, msg size: %d", pTransInst->label, conn, + tDebug("%s cli conn %p %s received from %s:%d, local info:%s:%d, msg size:%d code:0x%x", pTransInst->label, conn, TMSG_INFO(pHead->msgType), taosInetNtoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port), - taosInetNtoa(conn->localAddr.sin_addr), ntohs(conn->localAddr.sin_port), transMsg.contLen); + taosInetNtoa(conn->localAddr.sin_addr), ntohs(conn->localAddr.sin_port), transMsg.contLen, transMsg.code); if (pCtx == NULL && CONN_NO_PERSIST_BY_APP(conn)) { tTrace("except, server continue send while cli ignore it"); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 020435d076..b6e3fd2676 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -285,13 +285,14 @@ static void uvHandleReq(SSvrConn* pConn) { } if (pConn->status == ConnNormal && pHead->noResp == 0) { transRefSrvHandle(pConn); - tDebug("server conn %p %s received from %s:%d, local info: %s:%d, msg size: %d", pConn, TMSG_INFO(transMsg.msgType), - taosInetNtoa(pConn->addr.sin_addr), ntohs(pConn->addr.sin_port), taosInetNtoa(pConn->localAddr.sin_addr), - ntohs(pConn->localAddr.sin_port), transMsg.contLen); - } else { - tDebug("server conn %p %s received from %s:%d, local info: %s:%d, msg size: %d, resp:%d ", pConn, + tDebug("server conn %p %s received from %s:%d, local info:%s:%d, msg size:%d code:0x%x", pConn, TMSG_INFO(transMsg.msgType), taosInetNtoa(pConn->addr.sin_addr), ntohs(pConn->addr.sin_port), - taosInetNtoa(pConn->localAddr.sin_addr), ntohs(pConn->localAddr.sin_port), transMsg.contLen, pHead->noResp); + taosInetNtoa(pConn->localAddr.sin_addr), ntohs(pConn->localAddr.sin_port), transMsg.contLen, transMsg.code); + } else { + tDebug("server conn %p %s received from %s:%d, local info:%s:%d, msg size:%d, resp:%d code:0x%x", pConn, + TMSG_INFO(transMsg.msgType), taosInetNtoa(pConn->addr.sin_addr), ntohs(pConn->addr.sin_port), + taosInetNtoa(pConn->localAddr.sin_addr), ntohs(pConn->localAddr.sin_port), transMsg.contLen, pHead->noResp, + transMsg.code); // no ref here } From 928f45709dfce0946ce9dc76b7162ba7cd5718df Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 16 Jun 2022 16:01:22 +0800 Subject: [PATCH 21/36] feat(stream): drop stream --- include/common/tmsg.h | 4 +- include/util/tdef.h | 2 +- source/dnode/mnode/impl/inc/mndDef.h | 51 +++-- source/dnode/mnode/impl/inc/mndStb.h | 6 +- source/dnode/mnode/impl/inc/mndStream.h | 2 + source/dnode/mnode/impl/src/mndDef.c | 39 +--- source/dnode/mnode/impl/src/mndScheduler.c | 12 +- source/dnode/mnode/impl/src/mndSma.c | 4 +- source/dnode/mnode/impl/src/mndStb.c | 31 ++- source/dnode/mnode/impl/src/mndStream.c | 244 +++++++++++++++++++-- 10 files changed, 295 insertions(+), 100 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 7c0e8206e1..01d99ac705 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1519,7 +1519,7 @@ typedef struct { #define STREAM_TRIGGER_MAX_DELAY 3 typedef struct { - char name[TSDB_TABLE_FNAME_LEN]; + char name[TSDB_STREAM_FNAME_LEN]; char sourceDB[TSDB_DB_FNAME_LEN]; char targetStbFullName[TSDB_TABLE_FNAME_LEN]; int8_t igExists; @@ -1539,7 +1539,7 @@ int32_t tDeserializeSCMCreateStreamReq(void* buf, int32_t bufLen, SCMCreateStrea void tFreeSCMCreateStreamReq(SCMCreateStreamReq* pReq); typedef struct { - char name[TSDB_TOPIC_FNAME_LEN]; + char name[TSDB_STREAM_FNAME_LEN]; int64_t streamId; char* sql; char* executorMsg; diff --git a/include/util/tdef.h b/include/util/tdef.h index 4798a9375c..a48e823bad 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -210,7 +210,7 @@ typedef enum ELogicConditionType { #define TSDB_TYPE_STR_MAX_LEN 32 #define TSDB_TABLE_FNAME_LEN (TSDB_DB_FNAME_LEN + TSDB_TABLE_NAME_LEN + TSDB_NAME_DELIMITER_LEN) #define TSDB_TOPIC_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_TABLE_NAME_LEN + TSDB_NAME_DELIMITER_LEN) -#define TSDB_STREAM_FNAME_LEN TSDB_TABLE_FNAME_LEN +#define TSDB_STREAM_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_TABLE_NAME_LEN + TSDB_NAME_DELIMITER_LEN) #define TSDB_SUBSCRIBE_KEY_LEN (TSDB_CGROUP_LEN + TSDB_TOPIC_FNAME_LEN + 2) #define TSDB_PARTITION_KEY_LEN (TSDB_SUBSCRIBE_KEY_LEN + 20) #define TSDB_COL_NAME_LEN 65 diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 35cbf178ec..db18f87830 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -539,27 +539,38 @@ typedef struct { } SMqRebOutputObj; typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - char sourceDb[TSDB_DB_FNAME_LEN]; - char targetDb[TSDB_DB_FNAME_LEN]; - char targetSTbName[TSDB_TABLE_FNAME_LEN]; - int64_t targetStbUid; - int64_t createTime; - int64_t updateTime; - int64_t uid; - int64_t dbUid; - int32_t version; - int32_t vgNum; - SRWLatch lock; - int8_t status; - int8_t createdBy; // STREAM_CREATED_BY__USER or SMA - int32_t fixedSinkVgId; // 0 for shuffle - SVgObj fixedSinkVg; - int64_t smaId; // 0 for unused - int8_t trigger; - int64_t triggerParam; - int64_t watermark; + char name[TSDB_STREAM_FNAME_LEN]; + // ctl + SRWLatch lock; + // create info + int64_t createTime; + int64_t updateTime; + int32_t version; + // TODO remove it + int64_t smaId; // 0 for unused + // info + int64_t uid; + int8_t status; + // TODO remove it + int32_t vgNum; + // config + int8_t dropPolicy; + int8_t trigger; + int64_t triggerParam; + int64_t watermark; + // source and target + int64_t sourceDbUid; + int64_t targetDbUid; + char sourceDb[TSDB_DB_FNAME_LEN]; + char targetDb[TSDB_DB_FNAME_LEN]; + char targetSTbName[TSDB_TABLE_FNAME_LEN]; + int64_t targetStbUid; + // assigned when scheduling + int32_t fixedSinkVgId; // 0 for shuffle + SVgObj fixedSinkVg; + // transformation char* sql; + char* ast; char* physicalPlan; SArray* tasks; // SArray> SSchemaWrapper outputSchema; diff --git a/source/dnode/mnode/impl/inc/mndStb.h b/source/dnode/mnode/impl/inc/mndStb.h index d3de4e3b3f..44a7fdadde 100644 --- a/source/dnode/mnode/impl/inc/mndStb.h +++ b/source/dnode/mnode/impl/inc/mndStb.h @@ -27,8 +27,7 @@ void mndCleanupStb(SMnode *pMnode); SStbObj *mndAcquireStb(SMnode *pMnode, char *stbName); void mndReleaseStb(SMnode *pMnode, SStbObj *pStb); SSdbRaw *mndStbActionEncode(SStbObj *pStb); -int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbs, int32_t numOfStbs, void **ppRsp, - int32_t *pRspLen); +int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbs, int32_t numOfStbs, void **ppRsp, int32_t *pRspLen); int32_t mndGetNumOfStbs(SMnode *pMnode, char *dbName, int32_t *pNumOfStbs); int32_t mndCheckCreateStbReq(SMCreateStbReq *pCreate); @@ -36,6 +35,9 @@ SDbObj *mndAcquireDbByStb(SMnode *pMnode, const char *stbName); int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreate, SDbObj *pDb); int32_t mndAddStbToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SStbObj *pStb); +void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst); +void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index b5d22cb7a5..65017027dc 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -33,6 +33,8 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast, STrans *pTrans); +int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 6eac91bb43..b9bd1e4298 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -19,7 +19,6 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { int32_t sz = 0; - /*int32_t outputNameSz = 0;*/ if (tEncodeCStr(pEncoder, pObj->name) < 0) return -1; if (tEncodeCStr(pEncoder, pObj->sourceDb) < 0) return -1; if (tEncodeCStr(pEncoder, pObj->targetDb) < 0) return -1; @@ -28,10 +27,12 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeI64(pEncoder, pObj->createTime) < 0) return -1; if (tEncodeI64(pEncoder, pObj->updateTime) < 0) return -1; if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->dbUid) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1; if (tEncodeI32(pEncoder, pObj->version) < 0) return -1; + if (tEncodeI32(pEncoder, pObj->vgNum) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->dropPolicy) < 0) return -1; if (tEncodeI8(pEncoder, pObj->status) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->createdBy) < 0) return -1; if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1; if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1; if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1; @@ -57,17 +58,6 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { } if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; - -#if 0 - if (pObj->ColAlias != NULL) { - outputNameSz = taosArrayGetSize(pObj->ColAlias); - } - if (tEncodeI32(pEncoder, outputNameSz) < 0) return -1; - for (int32_t i = 0; i < outputNameSz; i++) { - char *name = taosArrayGetP(pObj->ColAlias, i); - if (tEncodeCStr(pEncoder, name) < 0) return -1; - } -#endif return pEncoder->pos; } @@ -80,10 +70,12 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { if (tDecodeI64(pDecoder, &pObj->createTime) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->updateTime) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->dbUid) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1; if (tDecodeI32(pDecoder, &pObj->version) < 0) return -1; + if (tDecodeI32(pDecoder, &pObj->vgNum) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->dropPolicy) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->createdBy) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1; @@ -112,21 +104,6 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { } if (tDecodeSSchemaWrapper(pDecoder, &pObj->outputSchema) < 0) return -1; -#if 0 - int32_t outputNameSz; - if (tDecodeI32(pDecoder, &outputNameSz) < 0) return -1; - if (outputNameSz != 0) { - pObj->ColAlias = taosArrayInit(outputNameSz, sizeof(void *)); - if (pObj->ColAlias == NULL) { - return -1; - } - } - for (int32_t i = 0; i < outputNameSz; i++) { - char *name; - if (tDecodeCStrAlloc(pDecoder, &name) < 0) return -1; - taosArrayPush(pObj->ColAlias, &name); - } -#endif return 0; } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index ffddab3a91..c53e97cd53 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -131,7 +131,7 @@ int32_t mndPersistTaskDeployReq(STrans* pTrans, SStreamTask* pTask, const SEpSet int32_t mndAddSinkToTask(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream, SStreamTask* pTask) { pTask->dispatchType = TASK_DISPATCH__NONE; // sink - if (pStream->createdBy == STREAM_CREATED_BY__SMA) { + if (pStream->smaId != 0) { pTask->sinkType = TASK_SINK__SMA; pTask->smaSink.smaId = pStream->smaId; } else { @@ -275,7 +275,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, STrans* pTrans, SStreamOb pTask->execType = TASK_EXEC__NONE; // sink - if (pStream->createdBy == STREAM_CREATED_BY__SMA) { + if (pStream->smaId != 0) { pTask->sinkType = TASK_SINK__SMA; pTask->smaSink.smaId = pStream->smaId; } else { @@ -321,7 +321,7 @@ int32_t mndAddFixedSinkTaskToStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pTask->execType = TASK_EXEC__NONE; // sink - if (pStream->createdBy == STREAM_CREATED_BY__SMA) { + if (pStream->smaId != 0) { pTask->sinkType = TASK_SINK__SMA; pTask->smaSink.smaId = pStream->smaId; } else { @@ -399,7 +399,7 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { // exec pFinalTask->execType = TASK_EXEC__PIPE; - SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->dbUid); + SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); if (mndAssignTaskToVg(pMnode, pTrans, pFinalTask, plan, pVgroup) < 0) { sdbRelease(pSdb, pVgroup); qDestroyQueryPlan(pPlan); @@ -420,7 +420,7 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { SVgObj* pVgroup; pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); if (pIter == NULL) break; - if (pVgroup->dbUid != pStream->dbUid) { + if (pVgroup->dbUid != pStream->sourceDbUid) { sdbRelease(pSdb, pVgroup); continue; } @@ -463,7 +463,7 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { SVgObj* pVgroup; pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); if (pIter == NULL) break; - if (pVgroup->dbUid != pStream->dbUid) { + if (pVgroup->dbUid != pStream->sourceDbUid) { sdbRelease(pSdb, pVgroup); continue; } diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 0f6bb74d9a..54fd0fe867 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -520,6 +520,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); + ASSERT(smaObj.uid != 0); char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "td.tsma.rst.tb.%s", pCreate->name); memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); @@ -556,10 +557,9 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea streamObj.createTime = taosGetTimestampMs(); streamObj.updateTime = streamObj.createTime; streamObj.uid = mndGenerateUid(pCreate->name, strlen(pCreate->name)); - streamObj.dbUid = pDb->uid; + streamObj.sourceDbUid = pDb->uid; streamObj.version = 1; streamObj.sql = pCreate->sql; - streamObj.createdBy = STREAM_CREATED_BY__SMA; streamObj.smaId = smaObj.uid; streamObj.watermark = 0; streamObj.trigger = STREAM_TRIGGER_AT_ONCE; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index d4a38dd17b..b83aa34a3a 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -28,7 +28,6 @@ #include "mndTrans.h" #include "mndUser.h" #include "mndVgroup.h" -#include "mndSma.h" #include "tname.h" #define STB_VER_NUMBER 1 @@ -1271,7 +1270,8 @@ static int32_t mndBuildStbSchemaImp(SDbObj *pDb, SStbObj *pStb, const char *tbNa return 0; } -static int32_t mndBuildStbSchema(SMnode *pMnode, const char *dbFName, const char *tbName, STableMetaRsp *pRsp, int32_t *smaVer) { +static int32_t mndBuildStbSchema(SMnode *pMnode, const char *dbFName, const char *tbName, STableMetaRsp *pRsp, + int32_t *smaVer) { char tbFName[TSDB_TABLE_FNAME_LEN] = {0}; snprintf(tbFName, sizeof(tbFName), "%s.%s", dbFName, tbName); @@ -1672,7 +1672,7 @@ int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbVersions, int32_t terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - + for (int32_t i = 0; i < numOfStbs; ++i) { SSTableVersion *pStbVersion = &pStbVersions[i]; pStbVersion->suid = be64toh(pStbVersion->suid); @@ -1681,7 +1681,7 @@ int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbVersions, int32_t pStbVersion->smaVer = ntohl(pStbVersion->smaVer); STableMetaRsp metaRsp = {0}; - int32_t smaVer = 0; + int32_t smaVer = 0; mDebug("stb:%s.%s, start to retrieve meta", pStbVersion->dbFName, pStbVersion->stbName); if (mndBuildStbSchema(pMnode, pStbVersion->dbFName, pStbVersion->stbName, &metaRsp, &smaVer) != 0) { metaRsp.numOfColumns = -1; @@ -1697,15 +1697,15 @@ int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbVersions, int32_t } if (pStbVersion->smaVer && pStbVersion->smaVer != smaVer) { - bool exist = false; - char tbFName[TSDB_TABLE_FNAME_LEN]; + bool exist = false; + char tbFName[TSDB_TABLE_FNAME_LEN]; STableIndexRsp indexRsp = {0}; indexRsp.pIndex = taosArrayInit(10, sizeof(STableIndexInfo)); if (NULL == indexRsp.pIndex) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - + sprintf(tbFName, "%s.%s", pStbVersion->dbFName, pStbVersion->stbName); int32_t code = mndGetTableSma(pMnode, tbFName, &indexRsp, &exist); if (code || !exist) { @@ -1769,16 +1769,23 @@ int32_t mndGetNumOfStbs(SMnode *pMnode, char *dbName, int32_t *pNumOfStbs) { return 0; } -static void mndExtractTableName(char *tableId, char *name) { +void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst) { + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); + + tNameGetFullDbName(&name, dst); +} + +void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize) { int32_t pos = -1; int32_t num = 0; - for (pos = 0; tableId[pos] != 0; ++pos) { - if (tableId[pos] == TS_PATH_DELIMITER[0]) num++; + for (pos = 0; stbFullName[pos] != 0; ++pos) { + if (stbFullName[pos] == TS_PATH_DELIMITER[0]) num++; if (num == 2) break; } if (num == 2) { - strcpy(name, tableId + pos + 1); + tstrncpy(dst, stbFullName + pos + 1, dstSize); } } @@ -1808,7 +1815,7 @@ static int32_t mndRetrieveStb(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc SName name = {0}; char stbName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - mndExtractTableName(pStb->name, &stbName[VARSTR_HEADER_SIZE]); + mndExtractTbNameFromStbFullName(pStb->name, &stbName[VARSTR_HEADER_SIZE], TSDB_TABLE_NAME_LEN); varDataSetLen(stbName, strlen(&stbName[VARSTR_HEADER_SIZE])); SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 491835fc15..de0b25b1dc 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -196,7 +196,8 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream) { } static int32_t mndCheckCreateStreamReq(SCMCreateStreamReq *pCreate) { - if (pCreate->name[0] == 0 || pCreate->sql == NULL || pCreate->sql[0] == 0) { + if (pCreate->name[0] == 0 || pCreate->sql == NULL || pCreate->sql[0] == 0 || pCreate->sourceDB[0] == 0 || + pCreate->targetStbFullName[0] == 0) { terrno = TSDB_CODE_MND_INVALID_STREAM_OPTION; return -1; } @@ -232,6 +233,114 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64 return code; } +static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, SCMCreateStreamReq *pCreate) { + SNode *pAst = NULL; + + mDebug("stream:%s to create", pCreate->name); + memcpy(pObj->name, pCreate->name, TSDB_STREAM_FNAME_LEN); + pObj->createTime = taosGetTimestampMs(); + pObj->updateTime = pObj->createTime; + pObj->version = 1; + pObj->smaId = 0; + + pObj->uid = mndGenerateUid(pObj->name, strlen(pObj->name)); + pObj->status = 0; + + // TODO + pObj->dropPolicy = 0; + pObj->trigger = pCreate->triggerType; + pObj->triggerParam = pCreate->maxDelay; + pObj->watermark = pCreate->watermark; + + memcpy(pObj->sourceDb, pCreate->sourceDB, TSDB_DB_FNAME_LEN); + SDbObj *pSourceDb = mndAcquireDb(pMnode, pCreate->sourceDB); + if (pSourceDb == NULL) { + /*ASSERT(0);*/ + mDebug("stream:%s failed to create, source db %s not exist", pCreate->name, pObj->sourceDb); + terrno = TSDB_CODE_MND_DB_NOT_EXIST; + return -1; + } + pObj->sourceDbUid = pSourceDb->uid; + + memcpy(pObj->targetSTbName, pCreate->targetStbFullName, TSDB_TABLE_FNAME_LEN); + + SDbObj *pTargetDb = mndAcquireDbByStb(pMnode, pObj->targetSTbName); + if (pTargetDb == NULL) { + mDebug("stream:%s failed to create, target db %s not exist", pCreate->name, pObj->targetDb); + terrno = TSDB_CODE_MND_DB_NOT_EXIST; + return -1; + } + tstrncpy(pObj->targetDb, pTargetDb->name, TSDB_DB_FNAME_LEN); + + pObj->targetStbUid = mndGenerateUid(pObj->targetSTbName, TSDB_TABLE_FNAME_LEN); + pObj->targetDbUid = pTargetDb->uid; + + pObj->sql = pCreate->sql; + pObj->ast = pCreate->ast; + + pCreate->sql = NULL; + pCreate->ast = NULL; + + // deserialize ast + if (nodesStringToNode(pObj->ast, &pAst) < 0) { + /*ASSERT(0);*/ + goto FAIL; + } + + // extract output schema from ast + if (qExtractResultSchema(pAst, (int32_t *)&pObj->outputSchema.nCols, &pObj->outputSchema.pSchema) != 0) { + /*ASSERT(0);*/ + goto FAIL; + } + + SQueryPlan *pPlan = NULL; + SPlanContext cxt = { + .pAstRoot = pAst, + .topicQuery = false, + .streamQuery = true, + .triggerType = pObj->trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->trigger, + .watermark = pObj->watermark, + }; + + // using ast and param to build physical plan + if (qCreateQueryPlan(&cxt, &pPlan, NULL) < 0) { + /*ASSERT(0);*/ + goto FAIL; + } + + // save physcial plan + if (nodesNodeToString((SNode *)pPlan, false, &pObj->physicalPlan, NULL) != 0) { + /*ASSERT(0);*/ + goto FAIL; + } + +FAIL: + if (pAst != NULL) nodesDestroyNode(pAst); + return 0; +} + +int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); + mndTransDrop(pTrans); + return -1; + } + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + return 0; +} + +int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); + mndTransDrop(pTrans); + return -1; + } + sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); + return 0; +} + int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast, STrans *pTrans) { SNode *pAst = NULL; @@ -356,24 +465,24 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { ASSERT(pTask->nodeId != 0); // vnode - if (pTask->nodeId > 0) { - SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); - if (pReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - pReq->head.vgId = htonl(pTask->nodeId); - pReq->taskId = pTask->taskId; - STransAction action = {0}; - memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVDropStreamTaskReq); - action.msgType = TDMT_VND_STREAM_TASK_DROP; - if (mndTransAppendRedoAction(pTrans, &action) != 0) { - taosMemoryFree(pReq); - return -1; - } + /*if (pTask->nodeId > 0) {*/ + SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } + pReq->head.vgId = htonl(pTask->nodeId); + pReq->taskId = pTask->taskId; + STransAction action = {0}; + memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); + action.pCont = pReq; + action.contLen = sizeof(SVDropStreamTaskReq); + action.msgType = TDMT_VND_STREAM_TASK_DROP; + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(pReq); + return -1; + } + /*}*/ return 0; } @@ -403,10 +512,9 @@ static int32_t mndCreateStream(SMnode *pMnode, SRpcMsg *pReq, SCMCreateStreamReq streamObj.updateTime = streamObj.createTime; streamObj.uid = mndGenerateUid(pCreate->name, strlen(pCreate->name)); streamObj.targetStbUid = mndGenerateUid(pCreate->targetStbFullName, TSDB_TABLE_FNAME_LEN); - streamObj.dbUid = pDb->uid; + streamObj.sourceDbUid = pDb->uid; streamObj.version = 1; streamObj.sql = pCreate->sql; - streamObj.createdBy = STREAM_CREATED_BY__USER; // TODO streamObj.fixedSinkVgId = 0; streamObj.smaId = 0; @@ -486,7 +594,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - // TODO check auth + // TODO check read auth for source and write auth for target +#if 0 pDb = mndAcquireDb(pMnode, createStreamReq.sourceDB); if (pDb == NULL) { terrno = TSDB_CODE_MND_DB_NOT_SELECTED; @@ -496,9 +605,52 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { if (mndCheckDbAuth(pMnode, pReq->conn.user, MND_OPER_WRITE_DB, pDb) != 0) { goto _OVER; } +#endif - code = mndCreateStream(pMnode, pReq, &createStreamReq, pDb); - if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; + // build stream obj from request + // + // schedule stream task for stream obj + // + SStreamObj streamObj = {0}; + if (mndBuildStreamObjFromCreateReq(pMnode, &streamObj, &createStreamReq) < 0) { + ASSERT(0); + mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); + goto _OVER; + } + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq); + if (pTrans == NULL) { + mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); + goto _OVER; + } + mndTransSetDbName(pTrans, streamObj.sourceDb); + // TODO + /*mndTransSetDbName(pTrans, streamObj.targetDb);*/ + mDebug("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); + + if (mndCreateStbForStream(pMnode, pTrans, &streamObj, pReq->conn.user) < 0) { + mError("trans:%d, failed to create stb for stream %s since %s", pTrans->id, createStreamReq.name, terrstr()); + mndTransDrop(pTrans); + goto _OVER; + } + + if (mndScheduleStream(pMnode, pTrans, &streamObj) < 0) { + mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); + mndTransDrop(pTrans); + goto _OVER; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); + mndTransDrop(pTrans); + goto _OVER; + } + + mndTransDrop(pTrans); + + /*code = mndCreateStream(pMnode, pReq, &createStreamReq, pDb);*/ + /*if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;*/ + code = TSDB_CODE_ACTION_IN_PROGRESS; _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -555,10 +707,54 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { return code; } + // drop stream + if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + DROP_STREAM_OVER: return code; } +int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { + SSdb *pSdb = pMnode->pSdb; + + void *pIter = NULL; + SStreamObj *pStream = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + + if (pStream->sourceDbUid == pDb->uid || pStream->targetDbUid == pDb->uid) { + if (pStream->sourceDbUid != pStream->targetDbUid) { + sdbRelease(pSdb, pStream); + return -1; + } else { + // TODO drop all task on snode + if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { + sdbRelease(pSdb, pStream); + return -1; + } + } + } else { + sdbRelease(pSdb, pStream); + continue; + } + +#if 0 + if (mndSetDropOffsetStreamLogs(pMnode, pTrans, pStream) < 0) { + sdbRelease(pSdb, pStream); + goto END; + } +#endif + + sdbRelease(pSdb, pStream); + } + + return 0; +} + static int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) { SSdb *pSdb = pMnode->pSdb; SDbObj *pDb = mndAcquireDb(pMnode, dbName); @@ -574,7 +770,7 @@ static int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfS pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) break; - if (pStream->dbUid == pDb->uid) { + if (pStream->sourceDbUid == pDb->uid) { numOfStreams++; } From 2f81a41f98ed0a49d7329eb561753971b6a4faba Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 16 Jun 2022 16:16:40 +0800 Subject: [PATCH 22/36] refactor(sync): add syncIsReady --- include/libs/sync/sync.h | 1 + source/libs/sync/src/syncMain.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index e694203563..fc9d419b1f 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -182,6 +182,7 @@ void syncStart(int64_t rid); void syncStop(int64_t rid); int32_t syncSetStandby(int64_t rid); ESyncState syncGetMyRole(int64_t rid); +bool syncIsReady(int64_t rid); const char* syncGetMyRoleStr(int64_t rid); SyncTerm syncGetMyTerm(int64_t rid); void syncGetEpSet(int64_t rid, SEpSet* pEpSet); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 05a41daa9e..cc2c82fa5f 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -381,6 +381,17 @@ ESyncState syncGetMyRole(int64_t rid) { return state; } +bool syncIsReady(int64_t rid) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return false; + } + assert(rid == pSyncNode->rid); + bool b = (pSyncNode->state == TAOS_SYNC_STATE_LEADER) && pSyncNode->restoreFinish; + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return b; +} + bool syncIsRestoreFinish(int64_t rid) { SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { From 87300b2ab047d2fecdffb0c4b542c8379b3af8d4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 16:21:01 +0800 Subject: [PATCH 23/36] fix(query): set correct error code. --- source/libs/executor/src/executorimpl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 9ce22431e1..bcd7b7f86f 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4532,12 +4532,14 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo tsdbReaderT pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond); if (pDataReader == NULL && terrno != 0) { + pTaskInfo->code = terrno; return NULL; } int32_t code = extractTableSchemaVersion(pHandle, pTableScanNode->scan.uid, pTaskInfo); if (code) { tsdbCleanupReadHandle(pDataReader); + pTaskInfo->code = terrno; return NULL; } @@ -4546,6 +4548,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo taosArrayDestroy(groupKeys); if (code) { tsdbCleanupReadHandle(pDataReader); + pTaskInfo->code = terrno; return NULL; } From 32d8e60ce249dc3cc571736bf2c7eba0327a2613 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 16 Jun 2022 16:22:58 +0800 Subject: [PATCH 24/36] refactor(sma): create --- source/dnode/mnode/impl/inc/mndDef.h | 8 +-- source/dnode/mnode/impl/inc/mndScheduler.h | 2 + source/dnode/mnode/impl/inc/mndStream.h | 2 +- source/dnode/mnode/impl/src/mndDef.c | 79 ++++++++++++---------- source/dnode/mnode/impl/src/mndScheduler.c | 2 - source/dnode/mnode/impl/src/mndSma.c | 38 ++++++++++- source/dnode/mnode/impl/src/mndStream.c | 8 ++- 7 files changed, 93 insertions(+), 46 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index db18f87830..4daeeaa9bf 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -546,13 +546,10 @@ typedef struct { int64_t createTime; int64_t updateTime; int32_t version; - // TODO remove it int64_t smaId; // 0 for unused // info int64_t uid; int8_t status; - // TODO remove it - int32_t vgNum; // config int8_t dropPolicy; int8_t trigger; @@ -565,9 +562,10 @@ typedef struct { char targetDb[TSDB_DB_FNAME_LEN]; char targetSTbName[TSDB_TABLE_FNAME_LEN]; int64_t targetStbUid; - // assigned when scheduling int32_t fixedSinkVgId; // 0 for shuffle - SVgObj fixedSinkVg; + // fixedSinkVg is not applicable for encode and decode + SVgObj fixedSinkVg; + // transformation char* sql; char* ast; diff --git a/source/dnode/mnode/impl/inc/mndScheduler.h b/source/dnode/mnode/impl/inc/mndScheduler.h index 80fe472c56..8e816d2dd6 100644 --- a/source/dnode/mnode/impl/inc/mndScheduler.h +++ b/source/dnode/mnode/impl/inc/mndScheduler.h @@ -32,6 +32,8 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, double filesFactor); +int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 65017027dc..6616e84488 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -31,7 +31,7 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); -int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast, STrans *pTrans); +int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index b9bd1e4298..20ba71992e 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -18,35 +18,35 @@ #include "mndConsumer.h" int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { - int32_t sz = 0; if (tEncodeCStr(pEncoder, pObj->name) < 0) return -1; + + if (tEncodeI64(pEncoder, pObj->createTime) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->updateTime) < 0) return -1; + if (tEncodeI32(pEncoder, pObj->version) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->smaId) < 0) return -1; + + if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->status) < 0) return -1; + + if (tEncodeI8(pEncoder, pObj->dropPolicy) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1; + + if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1; if (tEncodeCStr(pEncoder, pObj->sourceDb) < 0) return -1; if (tEncodeCStr(pEncoder, pObj->targetDb) < 0) return -1; if (tEncodeCStr(pEncoder, pObj->targetSTbName) < 0) return -1; if (tEncodeI64(pEncoder, pObj->targetStbUid) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->createTime) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->updateTime) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1; - if (tEncodeI32(pEncoder, pObj->version) < 0) return -1; - if (tEncodeI32(pEncoder, pObj->vgNum) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->dropPolicy) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->status) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1; if (tEncodeI32(pEncoder, pObj->fixedSinkVgId) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->smaId) < 0) return -1; - if (tEncodeCStr(pEncoder, pObj->sql) < 0) return -1; - /*if (tEncodeCStr(pEncoder, pObj->logicalPlan) < 0) return -1;*/ - if (tEncodeCStr(pEncoder, pObj->physicalPlan) < 0) return -1; - // TODO encode tasks - if (pObj->tasks) { - sz = taosArrayGetSize(pObj->tasks); - } - if (tEncodeI32(pEncoder, sz) < 0) return -1; + if (tEncodeCStr(pEncoder, pObj->sql) < 0) return -1; + if (tEncodeCStr(pEncoder, pObj->ast) < 0) return -1; + if (tEncodeCStr(pEncoder, pObj->physicalPlan) < 0) return -1; + + int32_t sz = taosArrayGetSize(pObj->tasks); + if (tEncodeI32(pEncoder, sz) < 0) return -1; for (int32_t i = 0; i < sz; i++) { SArray *pArray = taosArrayGetP(pObj->tasks, i); int32_t innerSz = taosArrayGetSize(pArray); @@ -58,32 +58,38 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { } if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; + return pEncoder->pos; } int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { if (tDecodeCStrTo(pDecoder, pObj->name) < 0) return -1; + + if (tDecodeI64(pDecoder, &pObj->createTime) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->updateTime) < 0) return -1; + if (tDecodeI32(pDecoder, &pObj->version) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->smaId) < 0) return -1; + + if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1; + + if (tDecodeI8(pDecoder, &pObj->dropPolicy) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1; + + if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1; if (tDecodeCStrTo(pDecoder, pObj->sourceDb) < 0) return -1; if (tDecodeCStrTo(pDecoder, pObj->targetDb) < 0) return -1; if (tDecodeCStrTo(pDecoder, pObj->targetSTbName) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->targetStbUid) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->createTime) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->updateTime) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1; - if (tDecodeI32(pDecoder, &pObj->version) < 0) return -1; - if (tDecodeI32(pDecoder, &pObj->vgNum) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->dropPolicy) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1; if (tDecodeI32(pDecoder, &pObj->fixedSinkVgId) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->smaId) < 0) return -1; + if (tDecodeCStrAlloc(pDecoder, &pObj->sql) < 0) return -1; - /*if (tDecodeCStrAlloc(pDecoder, &pObj->logicalPlan) < 0) return -1;*/ + if (tDecodeCStrAlloc(pDecoder, &pObj->ast) < 0) return -1; if (tDecodeCStrAlloc(pDecoder, &pObj->physicalPlan) < 0) return -1; + pObj->tasks = NULL; int32_t sz; if (tDecodeI32(pDecoder, &sz) < 0) return -1; @@ -104,6 +110,7 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { } if (tDecodeSSchemaWrapper(pDecoder, &pObj->outputSchema) < 0) return -1; + return 0; } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index c53e97cd53..6f8fc748c2 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -346,8 +346,6 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; } - ASSERT(pStream->vgNum == 0); - int32_t totLevel = LIST_LENGTH(pPlan->pSubplans); ASSERT(totLevel <= 2); pStream->tasks = taosArrayInit(totLevel, sizeof(void*)); diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 54fd0fe867..3eb5b03efd 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -20,6 +20,7 @@ #include "mndDnode.h" #include "mndInfoSchema.h" #include "mndMnode.h" +#include "mndScheduler.h" #include "mndShow.h" #include "mndStb.h" #include "mndStream.h" @@ -563,6 +564,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea streamObj.smaId = smaObj.uid; streamObj.watermark = 0; streamObj.trigger = STREAM_TRIGGER_AT_ONCE; + streamObj.ast = strdup(smaObj.ast); if (mndAllocSmaVgroup(pMnode, pDb, &streamObj.fixedSinkVg) != 0) { mError("sma:%s, failed to create since %s", smaObj.name, terrstr()); @@ -571,6 +573,39 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea smaObj.dstVgId = streamObj.fixedSinkVg.vgId; streamObj.fixedSinkVgId = smaObj.dstVgId; + SNode *pAst = NULL; + if (nodesStringToNode(streamObj.ast, &pAst) < 0) { + ASSERT(0); + return -1; + } + + // extract output schema from ast + if (qExtractResultSchema(pAst, (int32_t *)&streamObj.outputSchema.nCols, &streamObj.outputSchema.pSchema) != 0) { + ASSERT(0); + return -1; + } + + SQueryPlan *pPlan = NULL; + SPlanContext cxt = { + .pAstRoot = pAst, + .topicQuery = false, + .streamQuery = true, + .triggerType = streamObj.trigger, + .watermark = streamObj.watermark, + }; + + if (qCreateQueryPlan(&cxt, &pPlan, NULL) < 0) { + ASSERT(0); + return -1; + } + + // save physcial plan + if (nodesNodeToString((SNode *)pPlan, false, &streamObj.physicalPlan, NULL) != 0) { + ASSERT(0); + return -1; + } + if (pAst != NULL) nodesDestroyNode(pAst); + int32_t code = -1; STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, pReq); if (pTrans == NULL) goto _OVER; @@ -585,7 +620,8 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea if (mndSetUpdateSmaStbCommitLogs(pMnode, pTrans, pStb) != 0) goto _OVER; // if (mndSetCreateSmaRedoActions(pMnode, pTrans, pDb, &smaObj) != 0) goto _OVER; if (mndSetCreateSmaVgroupRedoActions(pMnode, pTrans, pDb, &streamObj.fixedSinkVg, &smaObj) != 0) goto _OVER; - if (mndAddStreamToTrans(pMnode, &streamObj, pCreate->ast, pTrans) != 0) goto _OVER; + if (mndScheduleStream(pMnode, pTrans, &streamObj) != 0) goto _OVER; + if (mndPersistStream(pMnode, pTrans, &streamObj) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; code = 0; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index de0b25b1dc..c1df9dd68c 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -640,6 +640,12 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } + if (mndPersistStream(pMnode, pTrans, &streamObj) < 0) { + mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); + mndTransDrop(pTrans); + goto _OVER; + } + if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); @@ -707,7 +713,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { return code; } - // drop stream + // drop stream if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); return -1; From 44e733645cf8fd75b5fc291e960d39b152405062 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 16 Jun 2022 16:35:55 +0800 Subject: [PATCH 25/36] ci: remove unstable case --- source/dnode/mnode/impl/inc/mndStream.h | 3 +-- source/dnode/mnode/impl/src/mndStream.c | 28 +++++++++++++------------ tests/system-test/fulltest.sh | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 6616e84488..69385c3a46 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -31,9 +31,8 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); -int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); - int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); +int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index c1df9dd68c..0d5bc9b021 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -607,10 +607,18 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } #endif + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq); + if (pTrans == NULL) { + mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); + goto _OVER; + } + + mndTransSetDbName(pTrans, createStreamReq.sourceDB); + // TODO + /*mndTransSetDbName(pTrans, streamObj.targetDb);*/ + mDebug("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); + // build stream obj from request - // - // schedule stream task for stream obj - // SStreamObj streamObj = {0}; if (mndBuildStreamObjFromCreateReq(pMnode, &streamObj, &createStreamReq) < 0) { ASSERT(0); @@ -618,34 +626,28 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq); - if (pTrans == NULL) { - mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); - goto _OVER; - } - mndTransSetDbName(pTrans, streamObj.sourceDb); - // TODO - /*mndTransSetDbName(pTrans, streamObj.targetDb);*/ - mDebug("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); - + // create stb for stream if (mndCreateStbForStream(pMnode, pTrans, &streamObj, pReq->conn.user) < 0) { mError("trans:%d, failed to create stb for stream %s since %s", pTrans->id, createStreamReq.name, terrstr()); mndTransDrop(pTrans); goto _OVER; } + // schedule stream task for stream obj if (mndScheduleStream(pMnode, pTrans, &streamObj) < 0) { mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); mndTransDrop(pTrans); goto _OVER; } + // add stream to trans if (mndPersistStream(pMnode, pTrans, &streamObj) < 0) { mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); mndTransDrop(pTrans); goto _OVER; } + // execute creation if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 552ed26f34..b9f7e0f2eb 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -101,7 +101,7 @@ python3 ./test.py -f 2-query/tail.py python3 ./test.py -f 6-cluster/5dnode1mnode.py python3 ./test.py -f 6-cluster/5dnode2mnode.py -python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py +#python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py # BUG python3 ./test.py -f 6-cluster/5dnode3mnodeStopInsert.py From 07288e26b2ffeb04aadb943cc67f28b64c2113ce Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 16 Jun 2022 17:07:31 +0800 Subject: [PATCH 26/36] refactor(stream): change msg name --- include/common/tmsg.h | 4 ++-- source/dnode/mnode/impl/src/mndStream.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 01d99ac705..ab0203adf8 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2259,11 +2259,11 @@ typedef struct { typedef struct { char name[TSDB_STREAM_FNAME_LEN]; int8_t igNotExists; -} SMDropStreamTaskReq; +} SMDropStreamReq; typedef struct { int8_t reserved; -} SMDropStreamTaskRsp; +} SMDropStreamRsp; typedef struct { SMsgHead head; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 0d5bc9b021..3a42c694c8 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -679,7 +679,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { /*SDbObj *pDb = NULL;*/ /*SUserObj *pUser = NULL;*/ - SMDropStreamTaskReq dropReq = *(SMDropStreamTaskReq *)pReq->pCont; + SMDropStreamReq dropReq = *(SMDropStreamReq *)pReq->pCont; pStream = mndAcquireStream(pMnode, dropReq.name); From bd518f3f71557f86b9d5b5273603ca6d14e066a3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 17:39:33 +0800 Subject: [PATCH 27/36] fix(query): remove group info during sort. --- source/libs/executor/inc/executorimpl.h | 8 --- source/libs/executor/inc/tsort.h | 6 --- source/libs/executor/src/executorimpl.c | 31 +++-------- source/libs/executor/src/scanoperator.c | 14 +---- source/libs/executor/src/sortoperator.c | 70 +++++-------------------- source/libs/executor/src/tsort.c | 57 ++++++-------------- 6 files changed, 37 insertions(+), 149 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index b435446513..034e2893df 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -696,10 +696,6 @@ typedef struct SSortedMergeOperatorInfo { int32_t numOfResPerPage; char** groupVal; SArray *groupInfo; - - bool hasGroupId; - uint64_t groupId; - STupleHandle* prefetchedTuple; } SSortedMergeOperatorInfo; typedef struct SSortOperatorInfo { @@ -712,10 +708,6 @@ typedef struct SSortOperatorInfo { int64_t startTs; // sort start time uint64_t sortElapsed; // sort elapsed time, time to flush to disk not included. - - STupleHandle *prefetchedTuple; - bool hasGroupId; - uint64_t groupId; } SSortOperatorInfo; typedef struct STagFilterOperatorInfo { diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index e731c55a7d..86ee841cc2 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -130,12 +130,6 @@ bool tsortIsNullVal(STupleHandle* pVHandle, int32_t colId); */ void* tsortGetValue(STupleHandle* pVHandle, int32_t colId); -/** - * - * @param pVHandle - * @return - */ -uint64_t tsortGetGroupId(STupleHandle* pVHandle); /** * * @param pSortHandle diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index bcd7b7f86f..daee71b666 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -3030,31 +3030,12 @@ SSDataBlock* getSortedMergeBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlo blockDataEnsureCapacity(p, capacity); while (1) { - STupleHandle* pTupleHandle = NULL; - if (pInfo->prefetchedTuple == NULL) { - pTupleHandle = tsortNextTuple(pHandle); - } else { - pTupleHandle = pInfo->prefetchedTuple; - pInfo->groupId = tsortGetGroupId(pTupleHandle); - pInfo->prefetchedTuple = NULL; - } - + STupleHandle* pTupleHandle = tsortNextTuple(pHandle); if (pTupleHandle == NULL) { break; } - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (!pInfo->hasGroupId) { - pInfo->groupId = tupleGroupId; - pInfo->hasGroupId = true; - appendOneRowToDataBlock(p, pTupleHandle); - } else if (pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } - + appendOneRowToDataBlock(p, pTupleHandle); if (p->info.rows >= capacity) { break; } @@ -3073,7 +3054,6 @@ SSDataBlock* getSortedMergeBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlo pDataBlock->info.rows = p->info.rows; pDataBlock->info.capacity = p->info.rows; - pDataBlock->info.groupId = pInfo->groupId; } blockDataDestroy(p); @@ -3339,7 +3319,7 @@ static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { doSetOperatorCompleted(pOperator); } - size_t rows = blockDataGetNumOfRows(pInfo->pRes); // pInfo->pRes : NULL; + size_t rows = blockDataGetNumOfRows(pInfo->pRes); pOperator->resultInfo.totalRows += rows; return (rows == 0) ? NULL : pInfo->pRes; @@ -4920,7 +4900,10 @@ SArray* extractColumnInfo(SNodeList* pNodeList) { } SArray* extractPartitionColInfo(SNodeList* pNodeList) { - if(!pNodeList) return NULL; + if(!pNodeList) { + return NULL; + } + size_t numOfCols = LIST_LENGTH(pNodeList); SArray* pList = taosArrayInit(numOfCols, sizeof(SColumn)); if (pList == NULL) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 1ffb1529d7..ddebba1c9e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2175,25 +2175,13 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, int32_t capa pTupleHandle = tsortNextTuple(pHandle); } else { pTupleHandle = pInfo->prefetchedTuple; - pInfo->groupId = tsortGetGroupId(pTupleHandle); - pInfo->prefetchedTuple = NULL; } if (pTupleHandle == NULL) { break; } - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (!pInfo->hasGroupId) { - pInfo->groupId = tupleGroupId; - pInfo->hasGroupId = true; - appendOneRowToDataBlock(p, pTupleHandle); - } else if (pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } + appendOneRowToDataBlock(p, pTupleHandle); if (p->info.rows >= capacity) { break; diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 2f2080d5fe..81899b68cd 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -40,15 +40,13 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSDataBlock* pR initResultSizeInfo(pOperator, 1024); - pInfo->pSortInfo = pSortInfo; - pInfo->pColMatchInfo = pColMatchColInfo; - pInfo->hasGroupId = false; - pInfo->prefetchedTuple = NULL; - pOperator->name = "SortOperator"; + pInfo->pSortInfo = pSortInfo; + pInfo->pColMatchInfo = pColMatchColInfo; + pOperator->name = "SortOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_SORT; - pOperator->blocking = true; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; + pOperator->blocking = true; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; // lazy evaluation for the following parameter since the input datablock is not known till now. // pInfo->bufPageSize = rowSize < 1024 ? 1024 * 2 : rowSize * 2; // there are headers, so pageSize = rowSize + @@ -97,31 +95,12 @@ SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, i blockDataEnsureCapacity(p, capacity); while (1) { - STupleHandle* pTupleHandle = NULL; - if (pInfo->prefetchedTuple == NULL) { - pTupleHandle = tsortNextTuple(pHandle); - } else { - pTupleHandle = pInfo->prefetchedTuple; - pInfo->groupId = tsortGetGroupId(pTupleHandle); - pInfo->prefetchedTuple = NULL; - } - + STupleHandle* pTupleHandle = tsortNextTuple(pHandle); if (pTupleHandle == NULL) { break; } - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (!pInfo->hasGroupId) { - pInfo->groupId = tupleGroupId; - pInfo->hasGroupId = true; - appendOneRowToDataBlock(p, pTupleHandle); - } else if (pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } - + appendOneRowToDataBlock(p, pTupleHandle); if (p->info.rows >= capacity) { break; } @@ -140,7 +119,6 @@ SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, i pDataBlock->info.rows = p->info.rows; pDataBlock->info.capacity = p->info.rows; - pDataBlock->info.groupId = pInfo->groupId; } blockDataDestroy(p); @@ -255,10 +233,7 @@ typedef struct SMultiwaySortMergeOperatorInfo { SSDataBlock* pInputBlock; int64_t startTs; // sort start time - - bool hasGroupId; - uint64_t groupId; - STupleHandle* prefetchedTuple; + uint64_t groupId; } SMultiwaySortMergeOperatorInfo; int32_t doOpenMultiwaySortMergeOperator(SOperatorInfo* pOperator) { @@ -312,31 +287,12 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData blockDataEnsureCapacity(p, capacity); while (1) { - STupleHandle* pTupleHandle = NULL; - if (pInfo->prefetchedTuple == NULL) { - pTupleHandle = tsortNextTuple(pHandle); - } else { - pTupleHandle = pInfo->prefetchedTuple; - pInfo->groupId = tsortGetGroupId(pTupleHandle); - pInfo->prefetchedTuple = NULL; - } - + STupleHandle* pTupleHandle = tsortNextTuple(pHandle); if (pTupleHandle == NULL) { break; } - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (!pInfo->hasGroupId) { - pInfo->groupId = tupleGroupId; - pInfo->hasGroupId = true; - appendOneRowToDataBlock(p, pTupleHandle); - } else if (pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } - + appendOneRowToDataBlock(p, pTupleHandle); if (p->info.rows >= capacity) { break; } @@ -432,14 +388,12 @@ SOperatorInfo* createMultiwaySortMergeOperatorInfo(SOperatorInfo** downStreams, pOperator->blocking = false; pOperator->status = OP_NOT_OPENED; pOperator->info = pInfo; - pInfo->hasGroupId = false; - pInfo->prefetchedTuple = NULL; pOperator->pTaskInfo = pTaskInfo; pInfo->bufPageSize = getProperSortPageSize(rowSize); uint32_t numOfSources = taosArrayGetSize(pSortInfo); - numOfSources = TMAX(2, numOfSources); + numOfSources = TMAX(4, numOfSources); pInfo->sortBufSize = numOfSources * pInfo->bufPageSize; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 5bcd58f8db..1502387360 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -557,59 +557,40 @@ static int32_t createInitialSources(SSortHandle* pHandle) { SSortSource* source = taosArrayGetP(pHandle->pOrderedSource, 0); taosArrayClear(pHandle->pOrderedSource); - bool hasGroupId = false; - SSDataBlock* prefetchedDataBlock = NULL; - while (1) { - SSDataBlock* pBlock = NULL; - if (prefetchedDataBlock == NULL) { - pBlock = pHandle->fetchfp(source->param); - } else { - pBlock = prefetchedDataBlock; - prefetchedDataBlock = NULL; - } - + SSDataBlock* pBlock = pHandle->fetchfp(source->param); if (pBlock == NULL) { break; } - if (!hasGroupId) { - // calculate the buffer pages according to the total available buffers. + if (pHandle->pDataBlock == NULL) { pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock)); // todo, number of pages are set according to the total available sort buffer pHandle->numOfPages = 1024; sortBufSize = pHandle->numOfPages * pHandle->pageSize; - - hasGroupId = true; pHandle->pDataBlock = createOneDataBlock(pBlock, false); } - if (pHandle->pDataBlock->info.groupId == pBlock->info.groupId) { - // perform the scalar function calculation before apply the sort - if (pHandle->beforeFp != NULL) { - pHandle->beforeFp(pBlock, pHandle->param); - } + if (pHandle->beforeFp != NULL) { + pHandle->beforeFp(pBlock, pHandle->param); + } - int32_t code = blockDataMerge(pHandle->pDataBlock, pBlock); - if (code != 0) { - return code; - } + int32_t code = blockDataMerge(pHandle->pDataBlock, pBlock); + if (code != 0) { + return code; + } - size_t size = blockDataGetSize(pHandle->pDataBlock); - if (size > sortBufSize) { - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); - blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + size_t size = blockDataGetSize(pHandle->pDataBlock); + if (size > sortBufSize) { + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - int64_t el = taosGetTimestampUs() - p; - pHandle->sortElapsed += el; + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; - doAddToBuf(pHandle->pDataBlock, pHandle); - } - } else { - prefetchedDataBlock = pBlock; - pHandle->pDataBlock = createOneDataBlock(pBlock, false); + doAddToBuf(pHandle->pDataBlock, pHandle); } } @@ -758,10 +739,6 @@ void* tsortGetValue(STupleHandle* pVHandle, int32_t colIndex) { } } -uint64_t tsortGetGroupId(STupleHandle* pVHandle) { - return pVHandle->pBlock->info.groupId; -} - SSortExecInfo tsortGetSortExecInfo(SSortHandle* pHandle) { SSortExecInfo info = {0}; From 7b98430906e03208282be22ac084585172ac0a01 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Thu, 16 Jun 2022 17:26:52 +0800 Subject: [PATCH 28/36] feat(stream): stream partition by --- source/libs/executor/src/timewindowoperator.c | 170 ++++++++++-------- .../tsim/stream/distributeInterval0.sim | 37 +++- tests/script/tsim/stream/partitionby.sim | 62 ++++++- 3 files changed, 195 insertions(+), 74 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index f8972a02a1..d7ae823522 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1257,6 +1257,10 @@ static int32_t getAllIntervalWindow(SHashObj* pHashMap, SArray* resWins) { return TSDB_CODE_SUCCESS; } +bool isCloseWindow(STimeWindow *pWin, STimeWindowAggSupp* pSup) { + return pWin->ekey < pSup->maxTs - pSup->waterMark; +} + static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, SArray* closeWins) { void* pIte = NULL; @@ -1269,7 +1273,7 @@ static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, SResultRowInfo dumyInfo; dumyInfo.cur.pageId = -1; STimeWindow win = getActiveTimeWindow(NULL, &dumyInfo, ts, pInterval, pInterval->precision, NULL); - if (win.ekey < pSup->maxTs - pSup->waterMark) { + if (isCloseWindow(&win, pSup)) { char keyBuf[GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))]; SET_RES_WINDOW_KEY(keyBuf, &ts, sizeof(TSKEY), groupId); taosHashRemove(pHashMap, keyBuf, keyLen); @@ -2036,59 +2040,6 @@ _error: return NULL; } -static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, int32_t tableGroupId, - SArray* pUpdated) { - SStreamFinalIntervalOperatorInfo* pInfo = (SStreamFinalIntervalOperatorInfo*)pOperatorInfo->info; - SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); - SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; - int32_t numOfOutput = pOperatorInfo->numOfExprs; - int32_t step = 1; - bool ascScan = true; - TSKEY* tsCols = NULL; - SResultRow* pResult = NULL; - int32_t forwardRows = 0; - - if (pSDataBlock->pDataBlock != NULL) { - SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); - tsCols = (int64_t*)pColDataInfo->pData; - } else { - return; - } - - int32_t startPos = ascScan ? 0 : (pSDataBlock->info.rows - 1); - TSKEY ts = getStartTsKey(&pSDataBlock->info.window, tsCols); - STimeWindow nextWin = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, - pInfo->interval.precision, NULL); - while (1) { - int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, true, &pResult, tableGroupId, pInfo->binfo.pCtx, - numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); - if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); - } - SResKeyPos* pos = taosMemoryMalloc(sizeof(SResKeyPos) + sizeof(uint64_t)); - pos->groupId = tableGroupId; - pos->pos = (SResultRowPosition){.pageId = pResult->pageId, .offset = pResult->offset}; - *(int64_t*)pos->key = pResult->win.skey; - forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, NULL, - TSDB_ORDER_ASC); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdated) { - saveResultRow(pResult, tableGroupId, pUpdated); - } - // window start(end) key interpolation - // doWindowBorderInterpolation(pInfo, pSDataBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, - // forwardRows); - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); - doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, - tsCols, pSDataBlock->info.rows, numOfOutput, TSDB_ORDER_ASC); - int32_t prevEndPos = (forwardRows - 1) * step + startPos; - ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0); - startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, pInfo->order); - if (startPos < 0) { - break; - } - } -} - bool isFinalInterval(SStreamFinalIntervalOperatorInfo* pInfo) { return pInfo->pChildren != NULL; } void compactFunctions(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int32_t numOfOutput, @@ -2130,6 +2081,74 @@ static void rebuildIntervalWindow(SStreamFinalIntervalOperatorInfo* pInfo, SArra } } +bool isDeletedWindow(STimeWindow* pWin, uint64_t groupId, SAggSupporter* pSup) { + SET_RES_WINDOW_KEY(pSup->keyBuf, &pWin->skey, sizeof(int64_t), groupId); + SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, + pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(sizeof(int64_t))); + return p1 == NULL; +} + +static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t tableGroupId, + SArray* pUpdated) { + SStreamFinalIntervalOperatorInfo* pInfo = (SStreamFinalIntervalOperatorInfo*)pOperatorInfo->info; + SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); + SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; + int32_t numOfOutput = pOperatorInfo->numOfExprs; + int32_t step = 1; + bool ascScan = true; + TSKEY* tsCols = NULL; + SResultRow* pResult = NULL; + int32_t forwardRows = 0; + + if (pSDataBlock->pDataBlock != NULL) { + SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + tsCols = (int64_t*)pColDataInfo->pData; + } else { + return; + } + + int32_t startPos = ascScan ? 0 : (pSDataBlock->info.rows - 1); + TSKEY ts = getStartTsKey(&pSDataBlock->info.window, tsCols); + STimeWindow nextWin = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, + pInfo->interval.precision, NULL); + while (1) { + if (isFinalInterval(pInfo) && isCloseWindow(&nextWin, &pInfo->twAggSup) && + isDeletedWindow(&nextWin, tableGroupId, &pInfo->aggSup)) { + SArray* pUpWins = taosArrayInit(8, sizeof(STimeWindow)); + taosArrayPush(pUpWins, &nextWin); + rebuildIntervalWindow(pInfo, pUpWins, pInfo->binfo.pRes->info.groupId, + pOperatorInfo->numOfExprs, pOperatorInfo->pTaskInfo); + taosArrayDestroy(pUpWins); + } + int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, true, &pResult, tableGroupId, pInfo->binfo.pCtx, + numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + SResKeyPos* pos = taosMemoryMalloc(sizeof(SResKeyPos) + sizeof(uint64_t)); + pos->groupId = tableGroupId; + pos->pos = (SResultRowPosition){.pageId = pResult->pageId, .offset = pResult->offset}; + *(int64_t*)pos->key = pResult->win.skey; + forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, NULL, + TSDB_ORDER_ASC); + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdated) { + saveResultRow(pResult, tableGroupId, pUpdated); + } + // window start(end) key interpolation + // doWindowBorderInterpolation(pInfo, pSDataBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, + // forwardRows); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); + doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, + tsCols, pSDataBlock->info.rows, numOfOutput, TSDB_ORDER_ASC); + int32_t prevEndPos = (forwardRows - 1) * step + startPos; + ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0); + startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, pInfo->order); + if (startPos < 0) { + break; + } + } +} + static void clearStreamIntervalOperator(SStreamFinalIntervalOperatorInfo* pInfo) { taosHashClear(pInfo->aggSup.pResultRowHashTable); clearDiskbasedBuf(pInfo->aggSup.pResultBuf); @@ -2169,6 +2188,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { SStreamFinalIntervalOperatorInfo* pInfo = pOperator->info; SOperatorInfo* downstream = pOperator->pDownstream[0]; SArray* pUpdated = taosArrayInit(4, POINTER_BYTES); + TSKEY maxTs = INT64_MIN; if (pOperator->status == OP_EXEC_DONE) { return NULL; @@ -2222,6 +2242,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { } setInputDataBlock(pOperator, pInfo->binfo.pCtx, pBlock, pInfo->order, MAIN_SCAN, true); + doHashInterval(pOperator, pBlock, pBlock->info.groupId, pUpdated); if (isFinalInterval(pInfo)) { int32_t chIndex = getChildIndex(pBlock); int32_t size = taosArrayGetSize(pInfo->pChildren); @@ -2238,10 +2259,10 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { setInputDataBlock(pChildOp, pChInfo->binfo.pCtx, pBlock, pChInfo->order, MAIN_SCAN, true); doHashInterval(pChildOp, pBlock, pBlock->info.groupId, NULL); } - doHashInterval(pOperator, pBlock, pBlock->info.groupId, pUpdated); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + maxTs = TMAX(maxTs, pBlock->info.window.ekey); } + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); if (isFinalInterval(pInfo)) { closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pUpdated); } @@ -2564,7 +2585,7 @@ int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pTs, int32_t } static int32_t setWindowOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pResult, SqlFunctionCtx* pCtx, - int32_t groupId, int32_t numOfOutput, int32_t* rowCellInfoOffset, + uint64_t groupId, int32_t numOfOutput, int32_t* rowCellInfoOffset, SStreamAggSupporter* pAggSup, SExecTaskInfo* pTaskInfo) { assert(pWinInfo->win.skey <= pWinInfo->win.ekey); // too many time window in query @@ -2642,7 +2663,7 @@ int32_t getNumCompactWindow(SArray* pWinInfos, int32_t startIndex, int64_t gap) return size - startIndex - 1; } -void compactTimeWindow(SStreamSessionAggOperatorInfo* pInfo, int32_t startIndex, int32_t num, int32_t groupId, +void compactTimeWindow(SStreamSessionAggOperatorInfo* pInfo, int32_t startIndex, int32_t num, uint64_t groupId, int32_t numOfOutput, SExecTaskInfo* pTaskInfo, SHashObj* pStUpdated, SHashObj* pStDeleted) { SResultWindowInfo* pCurWin = taosArrayGet(pInfo->streamAggSup.pCurWins, startIndex); SResultRow* pCurResult = NULL; @@ -2667,13 +2688,18 @@ void compactTimeWindow(SStreamSessionAggOperatorInfo* pInfo, int32_t startIndex, } } +typedef struct SWinRes { + TSKEY ts; + uint64_t groupId; +} SWinRes; + static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SHashObj* pStUpdated, SHashObj* pStDeleted) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStreamSessionAggOperatorInfo* pInfo = pOperator->info; bool masterScan = true; int32_t numOfOutput = pOperator->numOfExprs; - int64_t groupId = pSDataBlock->info.groupId; + uint64_t groupId = pSDataBlock->info.groupId; int64_t gap = pInfo->gap; int64_t code = TSDB_CODE_SUCCESS; @@ -2693,7 +2719,7 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; for (int32_t i = 0; i < pSDataBlock->info.rows;) { int32_t winIndex = 0; - SResultWindowInfo* pCurWin = getSessionTimeWindow(pAggSup, tsCols[i], pSDataBlock->info.groupId, gap, &winIndex); + SResultWindowInfo* pCurWin = getSessionTimeWindow(pAggSup, tsCols[i], groupId, gap, &winIndex); winRows = updateSessionWindowInfo(pCurWin, tsCols, pSDataBlock->info.rows, i, pInfo->gap, pStDeleted); code = doOneWindowAgg(pInfo, pSDataBlock, pCurWin, &pResult, i, winRows, numOfOutput, pTaskInfo); if (code != TSDB_CODE_SUCCESS || pResult == NULL) { @@ -2709,7 +2735,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } pCurWin->isClosed = false; if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - code = taosHashPut(pStUpdated, &pCurWin->pos, sizeof(SResultRowPosition), &(pCurWin->win.skey), sizeof(TSKEY)); + SWinRes value = {.ts = pCurWin->win.skey, .groupId = groupId}; + code = taosHashPut(pStUpdated, &pCurWin->pos, sizeof(SResultRowPosition), &value, sizeof(SWinRes)); if (code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -2736,7 +2763,7 @@ static void doClearSessionWindows(SStreamAggSupporter* pAggSup, SOptrBasicInfo* } } -static int32_t copyUpdateResult(SHashObj* pStUpdated, SArray* pUpdated, int32_t groupId) { +static int32_t copyUpdateResult(SHashObj* pStUpdated, SArray* pUpdated) { void* pData = NULL; size_t keyLen = 0; while ((pData = taosHashIterate(pStUpdated, pData)) != NULL) { @@ -2746,9 +2773,9 @@ static int32_t copyUpdateResult(SHashObj* pStUpdated, SArray* pUpdated, int32_t if (pos == NULL) { return TSDB_CODE_QRY_OUT_OF_MEMORY; } - pos->groupId = groupId; + pos->groupId = ((SWinRes*)pData)->groupId; pos->pos = *(SResultRowPosition*)key; - *(int64_t*)pos->key = *(uint64_t*)pData; + *(int64_t*)pos->key = ((SWinRes*)pData)->ts; taosArrayPush(pUpdated, &pos); } return TSDB_CODE_SUCCESS; @@ -2815,7 +2842,9 @@ int32_t closeSessionWindow(SHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SArra __get_win_info_ fn) { // Todo(liuyao) save window to tdb void **pIte = NULL; + size_t keyLen = 0; while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { + uint64_t* pGroupId = taosHashGetKey(pIte, &keyLen); SArray *pWins = (SArray *) (*pIte); int32_t size = taosArrayGetSize(pWins); for (int32_t i = 0; i < size; i++) { @@ -2825,7 +2854,7 @@ int32_t closeSessionWindow(SHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SArra if (!pSeWin->isClosed) { pSeWin->isClosed = true; if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - int32_t code = saveResult(pSeWin->win.skey, pSeWin->pos.pageId, pSeWin->pos.offset, 0, pClosed); + int32_t code = saveResult(pSeWin->win.skey, pSeWin->pos.pageId, pSeWin->pos.offset, *pGroupId, pClosed); pSeWin->isOutput = true; } } @@ -2892,7 +2921,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pChildInfo = pChildOp->info; doClearSessionWindows(&pChildInfo->streamAggSup, &pChildInfo->binfo, pBlock, 0, pChildOp->numOfExprs, pChildInfo->gap, NULL); - rebuildTimeWindow(pInfo, pWins, pInfo->binfo.pRes->info.groupId, pOperator->numOfExprs, pOperator->pTaskInfo); + rebuildTimeWindow(pInfo, pWins, pBlock->info.groupId, pOperator->numOfExprs, pOperator->pTaskInfo); } taosArrayDestroy(pWins); continue; @@ -2916,7 +2945,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getSessionWinInfo); - copyUpdateResult(pStUpdated, pUpdated, pBInfo->pRes->info.groupId); + copyUpdateResult(pStUpdated, pUpdated); taosHashCleanup(pStUpdated); finalizeUpdatedResult(pOperator->numOfExprs, pInfo->streamAggSup.pResultBuf, pUpdated, @@ -3216,8 +3245,9 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl } pCurWin->winInfo.isClosed = false; if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - code = taosHashPut(pSeUpdated, &pCurWin->winInfo.pos, sizeof(SResultRowPosition), &(pCurWin->winInfo.win.skey), - sizeof(TSKEY)); + SWinRes value = {.ts = pCurWin->winInfo.win.skey, .groupId = groupId}; + code = taosHashPut(pSeUpdated, &pCurWin->winInfo.pos, sizeof(SResultRowPosition), + &value, sizeof(SWinRes)); if (code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -3274,7 +3304,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getStateWinInfo); - copyUpdateResult(pSeUpdated, pUpdated, pBInfo->pRes->info.groupId); + copyUpdateResult(pSeUpdated, pUpdated); taosHashCleanup(pSeUpdated); finalizeUpdatedResult(pOperator->numOfExprs, pInfo->streamAggSup.pResultBuf, pUpdated, diff --git a/tests/script/tsim/stream/distributeInterval0.sim b/tests/script/tsim/stream/distributeInterval0.sim index f4f3e04f0a..b720272116 100644 --- a/tests/script/tsim/stream/distributeInterval0.sim +++ b/tests/script/tsim/stream/distributeInterval0.sim @@ -173,4 +173,39 @@ endi sql select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5, avg(d) from st interval(10s); -system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file + +sql create database test1 vgroups 1; +sql use test1; +sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create table ts2 using st tags(2,2,2); +sql create stream stream_t2 trigger at_once into streamtST1 as select _wstartts, count(*) c1, count(a) c2 , sum(a) c3 , max(b) c5, min(c) c6 from st interval(10s) ; + +sql insert into ts1 values(1648791211000,1,2,3); +sql insert into ts1 values(1648791222001,2,2,3); +sql insert into ts2 values(1648791211000,1,2,3); +sql insert into ts2 values(1648791222001,2,2,3); + +$loop_count = 0 +loop2: +sql select * from streamtST1; + +sleep 300 +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop2 +endi + +#rows 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop2 +endi + +system sh/stop_dnodes.sh \ No newline at end of file diff --git a/tests/script/tsim/stream/partitionby.sim b/tests/script/tsim/stream/partitionby.sim index df1e096551..b84a01eb4a 100644 --- a/tests/script/tsim/stream/partitionby.sim +++ b/tests/script/tsim/stream/partitionby.sim @@ -34,6 +34,7 @@ print =====rows=$rows goto loop0 endi +print =====loop0 sql create database test1 vgroups 1; sql use test1; @@ -51,7 +52,7 @@ sql insert into ts2 values(1648791211000,1,2,3); $loop_count = 0 -loop0: +loop1: sleep 300 sql select * from streamt; @@ -62,7 +63,62 @@ endi if $rows != 2 then print =====rows=$rows -goto loop0 +goto loop1 endi -system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file +print =====loop1 + +sql create database test2 vgroups 1; +sql use test2; +sql create stable st(ts timestamp,a int,b int,c int,id int) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create table ts2 using st tags(2,2,2); + +sql create stream stream_t2 trigger at_once into streamtST as select _wstartts, count(*) c1, count(a) c2 , sum(a) c3 , max(b) c5, min(c) c6, max(id) c7 from st partition by ta interval(10s) ; +sql insert into ts1 values(1648791211000,1,2,3,1); +sql insert into ts1 values(1648791222001,2,2,3,2); +sql insert into ts2 values(1648791211000,1,2,3,3); +sql insert into ts2 values(1648791222001,2,2,3,4); + +sql insert into ts2 values(1648791222002,2,2,3,5); +sql insert into ts2 values(1648791222002,2,2,3,6); + +sql insert into ts1 values(1648791211000,1,2,3,1); +sql insert into ts1 values(1648791222001,2,2,3,2); +sql insert into ts2 values(1648791211000,1,2,3,3); +sql insert into ts2 values(1648791222001,2,2,3,4); + +$loop_count = 0 + +loop2: +sleep 300 +sql select * from streamtST; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $data01 != 1 then +print =====data01=$data01 +goto loop2 +endi + +if $data02 != 1 then +print =====data02=$data02 +goto loop2 +endi + +if $data03 != 1 then +print =====data03=$data03 +goto loop2 +endi + +if $data04 != 2 then +print =====data04=$data04 +goto loop2 +endi + +print =====loop2 + +system sh/stop_dnodes.sh \ No newline at end of file From ab47de1dae88097fc546a4e57f5dbe81a4e11b80 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 16 Jun 2022 18:30:22 +0800 Subject: [PATCH 29/36] fix(query): adjust group limitation. --- source/libs/executor/src/executorimpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index fb79c53384..66849e49b8 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -3508,7 +3508,7 @@ static int32_t handleLimitOffset(SOperatorInfo* pOperator, SSDataBlock* pBlock) if (pProjectInfo->limit.limit > 0 && pProjectInfo->curOutput + pRes->info.rows >= pProjectInfo->limit.limit) { pRes->info.rows = (int32_t)(pProjectInfo->limit.limit - pProjectInfo->curOutput); - if (pProjectInfo->slimit.limit == -1 || pProjectInfo->slimit.limit <= pProjectInfo->curGroupOutput) { + if (pProjectInfo->slimit.limit != -1 && pProjectInfo->slimit.limit <= pProjectInfo->curGroupOutput) { pOperator->status = OP_EXEC_DONE; } From c1de4df95bde0ec91dd59203d6403042fad52f20 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 16 Jun 2022 18:33:45 +0800 Subject: [PATCH 30/36] fix: fix page capacity calculation --- source/libs/tdb/src/db/tdbPage.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 92272fb438..78470b6256 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -246,14 +246,17 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage) { int tdbPageCapacity(int pageSize, int amHdrSize) { int szPageHdr; + int minCellIndexSize; // at least one cell in cell index if (pageSize < 65536) { szPageHdr = pageMethods.szPageHdr; + minCellIndexSize = pageMethods.szOffset; } else { szPageHdr = pageLargeMethods.szPageHdr; + minCellIndexSize = pageLargeMethods.szOffset; } - return pageSize - szPageHdr - amHdrSize; + return pageSize - szPageHdr - amHdrSize - sizeof(SPageFtr) - minCellIndexSize; } static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { @@ -599,4 +602,4 @@ SPageMethods pageLargeMethods = { setLPageCellOffset, // setCellOffset getLPageFreeCellInfo, // getFreeCellInfo setLPageFreeCellInfo // setFreeCellInfo -}; \ No newline at end of file +}; From 67540f2504464927ff1f684f97481138bfc19baf Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 16 Jun 2022 19:49:42 +0800 Subject: [PATCH 31/36] fix: table merge scan sort buf size --- source/libs/executor/src/scanoperator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index ddebba1c9e..a8aad8fe68 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2310,14 +2310,14 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInfo = generateSortByTsInfo(pInfo->cond.order); pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); int32_t rowSize = pInfo->pResBlock->info.rowSize; - pInfo->bufPageSize = rowSize < 1024 ? 1024 : rowSize * 2; + int32_t blockMetaSize = (int32_t)blockDataGetSerialMetaSize(pInfo->pResBlock->info.numOfCols); + pInfo->bufPageSize = (rowSize * 2 + blockMetaSize) < 1024 ? 1024 : (rowSize * 2 + blockMetaSize); pInfo->sortBufSize = pInfo->bufPageSize * 16; pInfo->hasGroupId = false; pInfo->prefetchedTuple = NULL; pOperator->name = "TableMergeScanOperator"; - // TODO : change it - pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN; pOperator->blocking = false; pOperator->status = OP_NOT_OPENED; pOperator->info = pInfo; From 98cbbc01975f78132c0ac90c0d0c1be6c73ef507 Mon Sep 17 00:00:00 2001 From: afwerar <1296468573@qq.com> Date: Thu, 16 Jun 2022 19:53:39 +0800 Subject: [PATCH 32/36] test: add auto exit case --- tests/system-test/test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/system-test/test.py b/tests/system-test/test.py index 5a68f548da..f3e39fc9ce 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -13,6 +13,7 @@ # pip install src/connector/python/ # -*- coding: utf-8 -*- +from ast import If import sys import getopt import subprocess @@ -33,8 +34,15 @@ import taos def checkRunTimeError(): import win32gui + timeCount = 0 while 1: time.sleep(1) + timeCount = timeCount + 1 + if (timeCount>900): + os.system("TASKKILL /F /IM taosd.exe") + os.system("TASKKILL /F /IM taos.exe") + os.system("TASKKILL /F /IM tmq_sim.exe") + quit(0) hwnd = win32gui.FindWindow(None, "Microsoft Visual C++ Runtime Library") if hwnd: os.system("TASKKILL /F /IM taosd.exe") From c858b6952853677f3e2a25d5532b63a5114f7df6 Mon Sep 17 00:00:00 2001 From: afwerar <1296468573@qq.com> Date: Thu, 16 Jun 2022 19:56:51 +0800 Subject: [PATCH 33/36] test: add auto exit case --- tests/system-test/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system-test/test.py b/tests/system-test/test.py index f3e39fc9ce..85f63a2868 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -13,7 +13,6 @@ # pip install src/connector/python/ # -*- coding: utf-8 -*- -from ast import If import sys import getopt import subprocess From 49d92079ca1005546eca0b87daba8fbdaee3e509 Mon Sep 17 00:00:00 2001 From: afwerar <1296468573@qq.com> Date: Thu, 16 Jun 2022 19:58:24 +0800 Subject: [PATCH 34/36] test: add auto exit case --- tests/system-test/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system-test/test.py b/tests/system-test/test.py index 85f63a2868..55bfd8c945 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -41,6 +41,7 @@ def checkRunTimeError(): os.system("TASKKILL /F /IM taosd.exe") os.system("TASKKILL /F /IM taos.exe") os.system("TASKKILL /F /IM tmq_sim.exe") + os.system("TASKKILL /F /IM mintty.exe") quit(0) hwnd = win32gui.FindWindow(None, "Microsoft Visual C++ Runtime Library") if hwnd: From bdf24b6ab06c991db5198835d18fff3720f793f1 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 16 Jun 2022 20:14:48 +0800 Subject: [PATCH 35/36] fix: uncomment the codes of dropping stable --- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index d170931e7e..da1a126a76 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -466,10 +466,10 @@ static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t version, void *pRe } // process request - // if (metaDropSTable(pVnode->pMeta, version, &req) < 0) { - // rcode = terrno; - // goto _exit; - // } + if (metaDropSTable(pVnode->pMeta, version, &req) < 0) { + rcode = terrno; + goto _exit; + } // return rsp _exit: From 8353da6694408895df7f277aa69920968dc560ce Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 16 Jun 2022 20:17:36 +0800 Subject: [PATCH 36/36] other: revert the code --- source/dnode/vnode/src/tsdb/tsdbReadImpl.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index e4c5ef8fc3..1c2514d46f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -233,17 +233,6 @@ int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget) { tsdbError("vgId:%d, SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u", TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len); return -1; - } else { - SBlockInfo *pBlkInfo = pReadh->pBlkInfo; - SBlock *pBlk = NULL; - int nBlks = (pBlkIdx->len - sizeof(SBlockInfo)) / sizeof(SBlock); - for (int n = 0; n < nBlks; ++n) { - pBlk = &pBlkInfo->blocks[n]; - if (pBlk->numOfSubBlocks >= 1) { - tsdbDebug("prop:vgId:%d, file %s , offset:%u len :%u has %d rows", TSDB_READ_REPO_ID(pReadh), - TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, pBlk->numOfRows); - } - } } // ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid);