From 3451ba0c827100ba30ce09ba212db30cc393703a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 9 Jun 2022 18:54:22 +0800 Subject: [PATCH 01/21] fix:error in schemaless --- source/client/src/clientSml.c | 64 +++++------------------------------ 1 file changed, 8 insertions(+), 56 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index d1d7325909..75c308c029 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -67,6 +67,8 @@ for (int i = 1; i < keyLen; ++i) { \ #define BINARY_ADD_LEN 2 // "binary" 2 means " " #define NCHAR_ADD_LEN 3 // L"nchar" 3 means L" " + +#define MAX_RETRY_TIMES 5 //================================================================================================= typedef TSDB_SML_PROTOCOL_TYPE SMLProtocolType; @@ -303,7 +305,7 @@ static int32_t smlApplySchemaAction(SSmlHandle* info, SSchemaAction* action) { uError("SML:0x%" PRIx64 " apply schema action. reset query cache. error: %s", info->id, taos_errstr(res2)); } taos_free_result(res2); - taosMsleep(10); + taosMsleep(500); } break; } @@ -327,7 +329,7 @@ static int32_t smlApplySchemaAction(SSmlHandle* info, SSchemaAction* action) { uError("SML:0x%" PRIx64 " apply schema action. reset query cache. error: %s", info->id, taos_errstr(res2)); } taos_free_result(res2); - taosMsleep(10); + taosMsleep(500); } break; } @@ -350,7 +352,7 @@ static int32_t smlApplySchemaAction(SSmlHandle* info, SSchemaAction* action) { uError("SML:0x%" PRIx64 " apply schema action. reset query cache. error: %s", info->id, taos_errstr(res2)); } taos_free_result(res2); - taosMsleep(10); + taosMsleep(500); } break; } @@ -373,7 +375,7 @@ static int32_t smlApplySchemaAction(SSmlHandle* info, SSchemaAction* action) { uError("SML:0x%" PRIx64 " apply schema action. reset query cache. error: %s", info->id, taos_errstr(res2)); } taos_free_result(res2); - taosMsleep(10); + taosMsleep(500); } break; } @@ -424,7 +426,7 @@ static int32_t smlApplySchemaAction(SSmlHandle* info, SSchemaAction* action) { uError("SML:0x%" PRIx64 " apply schema action. reset query cache. error: %s", info->id, taos_errstr(res2)); } taos_free_result(res2); - taosMsleep(10); + taosMsleep(500); } break; } @@ -541,56 +543,6 @@ end: return code; } -//========================================================================= - -/* Field Escape charaters - 1: measurement Comma,Space - 2: tag_key, tag_value, field_key Comma,Equal Sign,Space - 3: field_value Double quote,Backslash -*/ -//static void escapeSpecialCharacter(uint8_t field, const char **pos) { -// const char *cur = *pos; -// if (*cur != '\\') { -// return; -// } -// switch (field) { -// case 1: -// switch (*(cur + 1)) { -// case ',': -// case ' ': -// cur++; -// break; -// default: -// break; -// } -// break; -// case 2: -// switch (*(cur + 1)) { -// case ',': -// case ' ': -// case '=': -// cur++; -// break; -// default: -// break; -// } -// break; -// case 3: -// switch (*(cur + 1)) { -// case '"': -// case '\\': -// cur++; -// break; -// default: -// break; -// } -// break; -// default: -// break; -// } -// *pos = cur; -//} - static bool smlParseNumber(SSmlKv *kvVal, SSmlMsgBuf *msg){ const char *pVal = kvVal->value; int32_t len = kvVal->length; @@ -2311,7 +2263,7 @@ static int smlProcess(SSmlHandle *info, char* lines[], int numLines) { do{ code = smlModifyDBSchemas(info); if (code == 0) break; - } while (retryNum++ < taosHashGetSize(info->superTables)); + } while (retryNum++ < taosHashGetSize(info->superTables) * MAX_RETRY_TIMES); if (code != 0) { uError("SML:0x%"PRIx64" smlModifyDBSchemas error : %s", info->id, tstrerror(code)); From 25636d6201e929b9657f51f7d2f7cbac4e79239b Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 9 Jun 2022 19:21:52 +0800 Subject: [PATCH 02/21] feature: add merge interval operator --- source/libs/executor/inc/executorimpl.h | 3 + source/libs/executor/src/executorimpl.c | 45 ++++ source/libs/executor/src/timewindowoperator.c | 226 ++++++++++++++++++ 3 files changed, 274 insertions(+) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 4f02c559b1..5dd349f4ab 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -895,6 +895,9 @@ int64_t getSmaWaterMark(int64_t interval, double filesFactor); bool isSmaStream(int8_t triggerType); int32_t compareTimeWindow(const void* p1, const void* p2, const void* param); +int32_t finalizeResultRowIntoSDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, + SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, const int32_t* rowCellOffset, + SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); #ifdef __cplusplus } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 7b73fd8ae9..5bd9044167 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1955,6 +1955,51 @@ static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_ } } +int32_t finalizeResultRowIntoSDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, + SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, const int32_t* rowCellOffset, + SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo) { + SFilePage* page = getBufPage(pBuf, resultRowPosition->pageId); + SResultRow* pRow = (SResultRow*)((char*)page + resultRowPosition->offset); + + doUpdateNumOfRows(pRow, numOfExprs, rowCellOffset); + if (pRow->numOfRows == 0) { + releaseBufPage(pBuf, page); + return 0; + } + + if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { + releaseBufPage(pBuf, page); + return -1; + } + + for (int32_t j = 0; j < numOfExprs; ++j) { + int32_t slotId = pExprInfo[j].base.resSchema.slotId; + + pCtx[j].resultInfo = getResultCell(pRow, j, rowCellOffset); + if (pCtx[j].fpSet.finalize) { + int32_t code = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); + if (TAOS_FAILED(code)) { + qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code)); + longjmp(pTaskInfo->env, code); + } + } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) { + // do nothing, todo refactor + } else { + // expand the result into multiple rows. E.g., _wstartts, top(k, 20) + // the _wstartts needs to copy to 20 following rows, since the results of top-k expands to 20 different rows. + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId); + char* in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo); + for (int32_t k = 0; k < pRow->numOfRows; ++k) { + colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes); + } + } + } + + releaseBufPage(pBuf, page); + + return 0; +} + int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprInfo* pExprInfo, SDiskbasedBuf* pBuf, SGroupResInfo* pGroupResInfo, const int32_t* rowCellOffset, SqlFunctionCtx* pCtx, int32_t numOfExprs) { diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index b309478556..696b9139cf 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3175,3 +3175,229 @@ _error: pTaskInfo->code = code; return NULL; } + +typedef struct SMergeIntervalAggOperatorInfo { + SIntervalAggOperatorInfo intervalAggOperatorInfo; + + SHashObj* groupIntervalHash; +} SMergeIntervalAggOperatorInfo; + +void destroyMergeIntervalOperatorInfo(void* param, int32_t numOfOutput) { + SMergeIntervalAggOperatorInfo* pInfo = (SMergeIntervalAggOperatorInfo*)param; + taosHashCleanup(pInfo->groupIntervalHash); + destroyIntervalOperatorInfo(&pInfo->intervalAggOperatorInfo, numOfOutput); +} + +static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pBlock, + int32_t scanFlag, SSDataBlock* pResultBlock) { + SMergeIntervalAggOperatorInfo *miaInfo = pOperatorInfo->info; + SIntervalAggOperatorInfo * pInfo = &miaInfo->intervalAggOperatorInfo; + + SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; + + int32_t startPos = 0; + int32_t numOfOutput = pOperatorInfo->numOfExprs; + int64_t* tsCols = extractTsCol(pBlock, pInfo); + uint64_t tableGroupId = pBlock->info.groupId; + bool ascScan = (pInfo->order == TSDB_ORDER_ASC); + TSKEY blockStartTs = getStartTsKey(&pBlock->info.window, tsCols); + SResultRow* pResult = NULL; + + STimeWindow win = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, blockStartTs, &pInfo->interval, + pInfo->interval.precision, &pInfo->win); + //TODO: pResultBlock full + //TODO: pBlock not process not finished + //TODO: different block group id or no group id + //TODO: lastWin may be none, p1 shall not be null + //TODO: the last datablock + //TODO: blockDataUpdateTsWindow(pBlock, 0); + + int32_t ret = + setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pInfo->binfo.pCtx, + numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); + if (ret != TSDB_CODE_SUCCESS || pResult == NULL) { + longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + TSKEY ekey = ascScan ? win.ekey : win.skey; + int32_t forwardRows = + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->order); + ASSERT(forwardRows > 0); + + // prev time window not interpolation yet. + if (pInfo->timeWindowInterpo) { + SResultRowPosition pos = addToOpenWindowList(pResultRowInfo, pResult); + doInterpUnclosedTimeWindow(pOperatorInfo, numOfOutput, pResultRowInfo, pBlock, scanFlag, tsCols, &pos); + + // restore current time window + ret = + setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pInfo->binfo.pCtx, + numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); + if (ret != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + // window start key interpolation + doWindowBorderInterpolation(pInfo, pBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &win, startPos, forwardRows); + } + + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &win, true); + doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &win, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, tsCols, + pBlock->info.rows, numOfOutput, pInfo->order); + + doCloseWindow(pResultRowInfo, pInfo, pResult); + STimeWindow *lastWin = taosHashGet(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); + if (ascScan && win.skey > lastWin->ekey || (!ascScan) && win.skey < lastWin->ekey) { + SET_RES_WINDOW_KEY(pInfo->aggSup.keyBuf, &lastWin->skey, TSDB_KEYSIZE, tableGroupId); + SResultRowPosition* p1 = + (SResultRowPosition*)taosHashGet(pInfo->aggSup.pResultRowHashTable, pInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + finalizeResultRowIntoSDataBlock(pInfo->aggSup.pResultBuf, p1, + pInfo->binfo.pCtx, pOperatorInfo->pExpr, pOperatorInfo->numOfExprs, pInfo->binfo.rowCellInfoOffset, + pResultBlock, pTaskInfo); + taosHashRemove(pInfo->aggSup.pResultRowHashTable, pInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + + taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), &win, sizeof(STimeWindow)); + } + + STimeWindow nextWin = win; + while (1) { + int32_t prevEndPos = forwardRows - 1 + startPos; + startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, pInfo->order); + if (startPos < 0) { + break; + } + + // null data, failed to allocate more memory buffer + int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, + pInfo->binfo.pCtx, numOfOutput, pInfo->binfo.rowCellInfoOffset, + &pInfo->aggSup, pTaskInfo); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + ekey = ascScan ? nextWin.ekey : nextWin.skey; + forwardRows = + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->order); + + // window start(end) key interpolation + doWindowBorderInterpolation(pInfo, pBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, + forwardRows); + + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); + doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, + tsCols, pBlock->info.rows, numOfOutput, pInfo->order); + doCloseWindow(pResultRowInfo, pInfo, pResult); + } + + if (pInfo->timeWindowInterpo) { + saveDataBlockLastRow(pInfo->pPrevValues, pBlock, pInfo->pInterpCols); + } +} + +static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { + + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + SMergeIntervalAggOperatorInfo* miaInfo = pOperator->info; + SIntervalAggOperatorInfo *pInfo = &miaInfo->intervalAggOperatorInfo; + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + SSDataBlock* pRes = pInfo->binfo.pRes; + blockDataCleanup(pRes); + + int32_t scanFlag = MAIN_SCAN; + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + break; + } + + getTableScanInfo(pOperator, &pInfo->order, &scanFlag); + + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pOperator, pInfo->binfo.pCtx, pBlock, pInfo->order, scanFlag, true); + STableQueryInfo* pTableQueryInfo = pInfo->pCurrent; + + setIntervalQueryRange(pTableQueryInfo, pBlock->info.window.skey, &pTaskInfo->window); + doMergeIntervalAggImpl(pOperator, &pInfo->binfo.resultRowInfo, pBlock, scanFlag, pRes); + } + + if (pRes->info.rows == 0) { + doSetOperatorCompleted(pOperator); + } + + size_t rows = pRes->info.rows; + pOperator->resultInfo.totalRows += rows; + return (rows == 0) ? NULL : pRes; +} + +SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, + SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, + STimeWindowAggSupp* pTwAggSupp, SExecTaskInfo* pTaskInfo) { + SMergeIntervalAggOperatorInfo* miaInfo = taosMemoryCalloc(1, sizeof(SMergeIntervalAggOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (miaInfo == NULL || pOperator == NULL) { + goto _error; + } + SIntervalAggOperatorInfo *pInfo = &miaInfo->intervalAggOperatorInfo; + + pInfo->win = pTaskInfo->window; + pInfo->order = TSDB_ORDER_ASC; + pInfo->interval = *pInterval; + pInfo->execModel = pTaskInfo->execModel; + pInfo->twAggSup = *pTwAggSupp; + + pInfo->primaryTsIndex = primaryTsSlotId; + miaInfo->groupIntervalHash = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_NO_LOCK); + + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + initResultSizeInfo(pOperator, 4096); + + int32_t code = + initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, pResBlock, keyBufSize, pTaskInfo->id.str); + + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pInfo->win); + + pInfo->timeWindowInterpo = timeWindowinterpNeeded(pInfo->binfo.pCtx, numOfCols, pInfo); + if (pInfo->timeWindowInterpo) { + pInfo->binfo.resultRowInfo.openWindow = tdListNew(sizeof(SResultRowPosition)); + } + + // pInfo->pTableQueryInfo = initTableQueryInfo(pTableGroupInfo); + if (code != TSDB_CODE_SUCCESS /* || pInfo->pTableQueryInfo == NULL*/) { + goto _error; + } + + initResultRowInfo(&pInfo->binfo.resultRowInfo, (int32_t)1); + + pOperator->name = "TimeMergeIntervalAggOperator"; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->pExpr = pExprInfo; + pOperator->pTaskInfo = pTaskInfo; + pOperator->numOfExprs = numOfCols; + pOperator->info = pInfo; + + pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doMergeIntervalAgg, doStreamIntervalAgg, NULL, + destroyIntervalOperatorInfo, NULL, NULL, NULL); + + code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + +_error: + destroyMergeIntervalOperatorInfo(pInfo, numOfCols); + taosMemoryFreeClear(pInfo); + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} From 1d14725880d9b06d90de880f0fb4bfe13554822d Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 9 Jun 2022 21:38:57 +0800 Subject: [PATCH 03/21] feat:add async logic for schemaless --- source/client/src/clientSml.c | 141 ++++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 34 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 75c308c029..3039f93a30 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -155,8 +155,17 @@ typedef struct { int64_t endTime; } SSmlCostInfo; +typedef struct{ + SRequestObj* request; + SCatalog* catalog; + tsem_t sem; + TdThreadSpinlock lock; +} Params; + typedef struct { int64_t id; + Params params; + bool isLast; SMLProtocolType protocol; int8_t precision; @@ -1378,6 +1387,7 @@ static void smlDestroyInfo(SSmlHandle* info){ if(!info->dataFormat){ taosArrayDestroy(info->colsContainer); } + destroyRequest(info->pRequest); taosMemoryFreeClear(info); } @@ -1405,11 +1415,6 @@ static SSmlHandle* smlBuildSmlInfo(TAOS* taos, SRequestObj* request, SMLProtocol ((SVnodeModifOpStmt*)(info->pQuery->pRoot))->payloadType = PAYLOAD_TYPE_KV; info->taos = (STscObj *)taos; - code = catalogGetHandle(info->taos->pAppInfo->clusterId, &info->pCatalog); - if(code != TSDB_CODE_SUCCESS){ - uError("SML:0x%"PRIx64" get catalog error %d", info->id, code); - goto cleanup; - } info->precision = precision; info->protocol = protocol; @@ -2158,7 +2163,6 @@ end: return ret; } - static int32_t smlInsertData(SSmlHandle* info) { int32_t code = TSDB_CODE_SUCCESS; @@ -2200,10 +2204,12 @@ static int32_t smlInsertData(SSmlHandle* info) { } info->cost.insertRpcTime = taosGetTimestampUs(); - launchQueryImpl(info->pRequest, info->pQuery, true, NULL); + //launchQueryImpl(info->pRequest, info->pQuery, false, NULL); +// info->affectedRows = taos_affected_rows(info->pRequest); +// return info->pRequest->code; - info->affectedRows = taos_affected_rows(info->pRequest); - return info->pRequest->code; + launchAsyncQuery(info->pRequest, info->pQuery); + return TSDB_CODE_SUCCESS; } static void smlPrintStatisticInfo(SSmlHandle *info){ @@ -2284,30 +2290,53 @@ cleanup: return code; } -static int32_t isSchemalessDb(SSmlHandle* info){ +static int32_t isSchemalessDb(STscObj *taos, SCatalog *catalog){ SName name; - tNameSetDbName(&name, info->taos->acctId, info->taos->db, strlen(info->taos->db)); + tNameSetDbName(&name, taos->acctId, taos->db, strlen(taos->db)); char dbFname[TSDB_DB_FNAME_LEN] = {0}; tNameGetFullDbName(&name, dbFname); SDbCfgInfo pInfo = {0}; - SEpSet ep = getEpSet_s(&info->taos->pAppInfo->mgmtEp); + SEpSet ep = getEpSet_s(&taos->pAppInfo->mgmtEp); - int32_t code = catalogGetDBCfg(info->pCatalog, info->taos->pAppInfo->pTransporter, &ep, dbFname, &pInfo); + int32_t code = catalogGetDBCfg(catalog, taos->pAppInfo->pTransporter, &ep, dbFname, &pInfo); if (code != TSDB_CODE_SUCCESS) { - info->pRequest->code = code; - smlBuildInvalidDataMsg(&info->msgBuf, "catalogGetDBCfg error, code:", tstrerror(code)); return code; } taosArrayDestroy(pInfo.pRetensions); if (!pInfo.schemaless){ - info->pRequest->code = TSDB_CODE_SML_INVALID_DB_CONF; - smlBuildInvalidDataMsg(&info->msgBuf, "can not insert into schemaless db:", dbFname); return TSDB_CODE_SML_INVALID_DB_CONF; } return TSDB_CODE_SUCCESS; } +static void smlInsertCallback(void* param, void* res, int32_t code) { + if (code != TSDB_CODE_SUCCESS) { + uError("failed to execute, reason:%s\n", taos_errstr(res)); + } + SRequestObj *pRequest = (SRequestObj *)res; + int32_t rows = taos_affected_rows(pRequest); + SSmlHandle* info = (SSmlHandle *)param; + + // lock + taosThreadSpinLock(&info->params.lock); + info->params.request->body.resInfo.numOfRows += rows; + if(code != TSDB_CODE_SUCCESS){ + info->params.request->code = code; + } + taosThreadSpinUnlock(&info->params.lock); + // unlock + + printf("SML:0x%"PRIx64" insert finished, code: %d, total: %d, insert: %d\n", info->id, code, info->affectedRows, rows); + Params pParam = info->params; + bool isLast = info->isLast; + smlDestroyInfo(info); + + if(isLast){ + tsem_post(&pParam.sem); + } +} + /** * taos_schemaless_insert() parse and insert data points into database according to * different protocol. @@ -2336,48 +2365,92 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr return NULL; } - SSmlHandle* info = smlBuildSmlInfo(taos, request, (SMLProtocolType)protocol, precision); - if(!info){ - return (TAOS_RES*)request; - } + ((STscObj *)taos)->schemalessType = 1; + SSmlMsgBuf msg = {.buf = request->msgBuf, .len = ERROR_MSG_BUF_DEFAULT_SIZE}; - info->taos->schemalessType = 1; - if(request->pDb == NULL){ - request->code = TSDB_CODE_PAR_DB_NOT_SPECIFIED; - smlBuildInvalidDataMsg(&info->msgBuf, "Database not specified", NULL); + Params params = {.request = request}; + tsem_init(¶ms.sem, 0, 0); + taosThreadSpinInit(&(params.lock), 0); + + int32_t code = catalogGetHandle(((STscObj *)taos)->pAppInfo->clusterId, ¶ms.catalog); + if(code != TSDB_CODE_SUCCESS){ + uError("SML get catalog error %d", code); + request->code = code; goto end; } - if(isSchemalessDb(info) != TSDB_CODE_SUCCESS){ + if(request->pDb == NULL){ + request->code = TSDB_CODE_PAR_DB_NOT_SPECIFIED; + smlBuildInvalidDataMsg(&msg, "Database not specified", NULL); + goto end; + } + + if(isSchemalessDb(taos, params.catalog) != TSDB_CODE_SUCCESS){ request->code = TSDB_CODE_SML_INVALID_DB_CONF; - smlBuildInvalidDataMsg(&info->msgBuf, "Cannot write data to a non schemaless database", NULL); + smlBuildInvalidDataMsg(&msg, "Cannot write data to a non schemaless database", NULL); goto end; } if (!lines) { request->code = TSDB_CODE_SML_INVALID_DATA; - smlBuildInvalidDataMsg(&info->msgBuf, "lines is null", NULL); + smlBuildInvalidDataMsg(&msg, "lines is null", NULL); goto end; } if(protocol < TSDB_SML_LINE_PROTOCOL || protocol > TSDB_SML_JSON_PROTOCOL){ request->code = TSDB_CODE_SML_INVALID_PROTOCOL_TYPE; - smlBuildInvalidDataMsg(&info->msgBuf, "protocol invalidate", NULL); + smlBuildInvalidDataMsg(&msg, "protocol invalidate", NULL); goto end; } if(protocol == TSDB_SML_LINE_PROTOCOL && (precision < TSDB_SML_TIMESTAMP_NOT_CONFIGURED || precision > TSDB_SML_TIMESTAMP_NANO_SECONDS)){ request->code = TSDB_CODE_SML_INVALID_PRECISION_TYPE; - smlBuildInvalidDataMsg(&info->msgBuf, "precision invalidate for line protocol", NULL); + smlBuildInvalidDataMsg(&msg, "precision invalidate for line protocol", NULL); goto end; } - info->pRequest->code = smlProcess(info, lines, numLines); + int32_t perBatch = 20000; + for (int i = 0; i < ceil(((double)numLines)/perBatch); ++i) { + SRequestObj* req = (SRequestObj*)createRequest((STscObj *)taos, TSDB_SQL_INSERT); + if(!req){ + request->code = TSDB_CODE_OUT_OF_MEMORY; + uError("SML:taos_schemaless_insert error request is null"); + goto end; + } + SSmlHandle* info = smlBuildSmlInfo(taos, req, (SMLProtocolType)protocol, precision); + if(!info){ + request->code = TSDB_CODE_OUT_OF_MEMORY; + uError("SML:taos_schemaless_insert error SSmlHandle is null"); + goto end; + } + + if(numLines >= perBatch){ + numLines -= perBatch; + info->isLast = false; + }else{ + perBatch = numLines; + numLines = 0; + info->isLast = true; + } + + info->params = params; + info->pCatalog = params.catalog; + info->affectedRows = perBatch; + info->pRequest->body.queryFp = smlInsertCallback; + info->pRequest->body.param = info; + code = smlProcess(info, lines, perBatch); + lines += perBatch; + if (code != TSDB_CODE_SUCCESS){ + info->pRequest->body.queryFp(info, req, code); + } + } + tsem_wait(¶ms.sem); end: - info->taos->schemalessType = 0; - uDebug("result:%s", info->msgBuf.buf); - smlDestroyInfo(info); + taosThreadSpinDestroy(¶ms.lock); + tsem_destroy(¶ms.sem); + ((STscObj *)taos)->schemalessType = 0; + uDebug("result:%s", request->msgBuf); return (TAOS_RES*)request; } From c97dcbcf76e5d0f2b1520f664026526e0cc91eed Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 10 Jun 2022 09:55:14 +0800 Subject: [PATCH 04/21] feature: add merge interval operator --- source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/src/executorimpl.c | 12 +- source/libs/executor/src/timewindowoperator.c | 155 +++++++++++------- 3 files changed, 108 insertions(+), 61 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 5dd349f4ab..6a7e71412e 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -895,7 +895,7 @@ int64_t getSmaWaterMark(int64_t interval, double filesFactor); bool isSmaStream(int8_t triggerType); int32_t compareTimeWindow(const void* p1, const void* p2, const void* param); -int32_t finalizeResultRowIntoSDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, +int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, const int32_t* rowCellOffset, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 5bd9044167..e82c94073c 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1955,7 +1955,7 @@ static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_ } } -int32_t finalizeResultRowIntoSDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, +int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, const int32_t* rowCellOffset, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo) { SFilePage* page = getBufPage(pBuf, resultRowPosition->pageId); @@ -1967,9 +1967,13 @@ int32_t finalizeResultRowIntoSDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* return 0; } - if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { - releaseBufPage(pBuf, page); - return -1; + while (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { + int32_t code = blockDataEnsureCapacity(pBlock, pBlock->info.capacity * 1.25); + if (TAOS_FAILED(code)) { + releaseBufPage(pBuf, page); + qError("%s ensure result data capacity failed, code %s", GET_TASKID(pTaskInfo), tstrerror(code)); + longjmp(pTaskInfo->env, code); + } } for (int32_t j = 0; j < numOfExprs; ++j) { diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 696b9139cf..3ed416ec7c 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3180,117 +3180,140 @@ typedef struct SMergeIntervalAggOperatorInfo { SIntervalAggOperatorInfo intervalAggOperatorInfo; SHashObj* groupIntervalHash; + bool hasGroupId; + uint64_t groupId; + SSDataBlock *prefetchedBlock; } SMergeIntervalAggOperatorInfo; void destroyMergeIntervalOperatorInfo(void* param, int32_t numOfOutput) { - SMergeIntervalAggOperatorInfo* pInfo = (SMergeIntervalAggOperatorInfo*)param; - taosHashCleanup(pInfo->groupIntervalHash); - destroyIntervalOperatorInfo(&pInfo->intervalAggOperatorInfo, numOfOutput); + SMergeIntervalAggOperatorInfo* miaInfo = (SMergeIntervalAggOperatorInfo*)param; + taosHashCleanup(miaInfo->groupIntervalHash); + destroyIntervalOperatorInfo(&miaInfo->intervalAggOperatorInfo, numOfOutput); +} + +static int32_t outputPrevIntervalResult(SOperatorInfo * pOperatorInfo, uint64_t tableGroupId, SSDataBlock *pResultBlock, STimeWindow* newWin) { + SMergeIntervalAggOperatorInfo *miaInfo = pOperatorInfo->info; + SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; + SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; + bool ascScan = (iaInfo->order == TSDB_ORDER_ASC); + + STimeWindow *prevWin= taosHashGet(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); + if (prevWin == NULL) { + taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), newWin, sizeof(STimeWindow)); + return 0; + } + + if (ascScan && newWin->skey > prevWin->ekey || (!ascScan) && newWin->skey < prevWin->ekey) { + SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &prevWin->skey, TSDB_KEYSIZE, tableGroupId); + SResultRowPosition* p1 = + (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + ASSERT(p1 != NULL); + + finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, iaInfo->binfo.pCtx, pOperatorInfo->pExpr, + pOperatorInfo->numOfExprs, iaInfo->binfo.rowCellInfoOffset, pResultBlock, + pTaskInfo); + taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + + taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), newWin, sizeof(STimeWindow)); + } + return 0; } static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pBlock, int32_t scanFlag, SSDataBlock* pResultBlock) { SMergeIntervalAggOperatorInfo *miaInfo = pOperatorInfo->info; - SIntervalAggOperatorInfo * pInfo = &miaInfo->intervalAggOperatorInfo; + SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; int32_t startPos = 0; int32_t numOfOutput = pOperatorInfo->numOfExprs; - int64_t* tsCols = extractTsCol(pBlock, pInfo); + int64_t* tsCols = extractTsCol(pBlock, iaInfo); uint64_t tableGroupId = pBlock->info.groupId; - bool ascScan = (pInfo->order == TSDB_ORDER_ASC); + bool ascScan = (iaInfo->order == TSDB_ORDER_ASC); TSKEY blockStartTs = getStartTsKey(&pBlock->info.window, tsCols); SResultRow* pResult = NULL; - STimeWindow win = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, blockStartTs, &pInfo->interval, - pInfo->interval.precision, &pInfo->win); - //TODO: pResultBlock full + STimeWindow win = getActiveTimeWindow(iaInfo->aggSup.pResultBuf, pResultRowInfo, blockStartTs, &iaInfo->interval, + iaInfo->interval.precision, &iaInfo->win); //TODO: pBlock not process not finished //TODO: different block group id or no group id - //TODO: lastWin may be none, p1 shall not be null //TODO: the last datablock //TODO: blockDataUpdateTsWindow(pBlock, 0); int32_t ret = - setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pInfo->binfo.pCtx, - numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); + setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, iaInfo->binfo.pCtx, + numOfOutput, iaInfo->binfo.rowCellInfoOffset, &iaInfo->aggSup, pTaskInfo); if (ret != TSDB_CODE_SUCCESS || pResult == NULL) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } TSKEY ekey = ascScan ? win.ekey : win.skey; int32_t forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->order); + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->order); ASSERT(forwardRows > 0); // prev time window not interpolation yet. - if (pInfo->timeWindowInterpo) { + if (iaInfo->timeWindowInterpo) { SResultRowPosition pos = addToOpenWindowList(pResultRowInfo, pResult); doInterpUnclosedTimeWindow(pOperatorInfo, numOfOutput, pResultRowInfo, pBlock, scanFlag, tsCols, &pos); // restore current time window ret = - setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pInfo->binfo.pCtx, - numOfOutput, pInfo->binfo.rowCellInfoOffset, &pInfo->aggSup, pTaskInfo); + setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, + iaInfo->binfo.pCtx, + numOfOutput, iaInfo->binfo.rowCellInfoOffset, &iaInfo->aggSup, pTaskInfo); if (ret != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } // window start key interpolation - doWindowBorderInterpolation(pInfo, pBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &win, startPos, forwardRows); + doWindowBorderInterpolation(iaInfo, pBlock, numOfOutput, iaInfo->binfo.pCtx, pResult, &win, startPos, forwardRows); } - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &win, true); - doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &win, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, tsCols, - pBlock->info.rows, numOfOutput, pInfo->order); + updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &win, true); + doApplyFunctions(pTaskInfo, iaInfo->binfo.pCtx, &win, &iaInfo->twAggSup.timeWindowData, startPos, forwardRows, tsCols, + pBlock->info.rows, numOfOutput, iaInfo->order); + doCloseWindow(pResultRowInfo, iaInfo, pResult); - doCloseWindow(pResultRowInfo, pInfo, pResult); - STimeWindow *lastWin = taosHashGet(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); - if (ascScan && win.skey > lastWin->ekey || (!ascScan) && win.skey < lastWin->ekey) { - SET_RES_WINDOW_KEY(pInfo->aggSup.keyBuf, &lastWin->skey, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(pInfo->aggSup.pResultRowHashTable, pInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); - finalizeResultRowIntoSDataBlock(pInfo->aggSup.pResultBuf, p1, - pInfo->binfo.pCtx, pOperatorInfo->pExpr, pOperatorInfo->numOfExprs, pInfo->binfo.rowCellInfoOffset, - pResultBlock, pTaskInfo); - taosHashRemove(pInfo->aggSup.pResultRowHashTable, pInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); - - taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), &win, sizeof(STimeWindow)); - } + // output previous interval results after this interval (&win) is closed + outputPrevIntervalResult(pOperatorInfo, tableGroupId, pResultBlock, &win); STimeWindow nextWin = win; while (1) { int32_t prevEndPos = forwardRows - 1 + startPos; - startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, pInfo->order); + startPos = getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, iaInfo->order); if (startPos < 0) { break; } // null data, failed to allocate more memory buffer int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, - pInfo->binfo.pCtx, numOfOutput, pInfo->binfo.rowCellInfoOffset, - &pInfo->aggSup, pTaskInfo); + iaInfo->binfo.pCtx, numOfOutput, iaInfo->binfo.rowCellInfoOffset, + &iaInfo->aggSup, pTaskInfo); if (code != TSDB_CODE_SUCCESS || pResult == NULL) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } ekey = ascScan ? nextWin.ekey : nextWin.skey; forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->order); + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->order); // window start(end) key interpolation - doWindowBorderInterpolation(pInfo, pBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, + doWindowBorderInterpolation(iaInfo, pBlock, numOfOutput, iaInfo->binfo.pCtx, pResult, &nextWin, startPos, forwardRows); - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); - doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, - tsCols, pBlock->info.rows, numOfOutput, pInfo->order); - doCloseWindow(pResultRowInfo, pInfo, pResult); + updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &nextWin, true); + doApplyFunctions(pTaskInfo, iaInfo->binfo.pCtx, &nextWin, &iaInfo->twAggSup.timeWindowData, startPos, forwardRows, + tsCols, pBlock->info.rows, numOfOutput, iaInfo->order); + doCloseWindow(pResultRowInfo, iaInfo, pResult); + + // output previous interval results after this interval (&nextWin) is closed + outputPrevIntervalResult(pOperatorInfo, tableGroupId, pResultBlock, &nextWin); } - if (pInfo->timeWindowInterpo) { - saveDataBlockLastRow(pInfo->pPrevValues, pBlock, pInfo->pInterpCols); + if (iaInfo->timeWindowInterpo) { + saveDataBlockLastRow(iaInfo->pPrevValues, pBlock, iaInfo->pInterpCols); } } @@ -3299,36 +3322,56 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SMergeIntervalAggOperatorInfo* miaInfo = pOperator->info; - SIntervalAggOperatorInfo *pInfo = &miaInfo->intervalAggOperatorInfo; + SIntervalAggOperatorInfo *iaInfo = &miaInfo->intervalAggOperatorInfo; if (pOperator->status == OP_EXEC_DONE) { return NULL; } - SSDataBlock* pRes = pInfo->binfo.pRes; + SSDataBlock* pRes = iaInfo->binfo.pRes; blockDataCleanup(pRes); - int32_t scanFlag = MAIN_SCAN; - SOperatorInfo* downstream = pOperator->pDownstream[0]; - + int32_t scanFlag = MAIN_SCAN; while (1) { - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + SSDataBlock *pBlock = NULL; + if (miaInfo->prefetchedBlock == NULL) { + pBlock = downstream->fpSet.getNextFn(downstream); + } else { + pBlock = miaInfo->prefetchedBlock; + miaInfo->groupId = pBlock->info.groupId; + } + if (pBlock == NULL) { break; } - getTableScanInfo(pOperator, &pInfo->order, &scanFlag); - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pOperator, pInfo->binfo.pCtx, pBlock, pInfo->order, scanFlag, true); - STableQueryInfo* pTableQueryInfo = pInfo->pCurrent; + if (!miaInfo->hasGroupId) { + miaInfo->hasGroupId = true; + miaInfo->groupId = pBlock->info.groupId; + } else if (miaInfo->groupId != pBlock->info.groupId) { + miaInfo->prefetchedBlock = pBlock; + break; + } + + getTableScanInfo(pOperator, &iaInfo->order, &scanFlag); + setInputDataBlock(pOperator, iaInfo->binfo.pCtx, pBlock, iaInfo->order, scanFlag, true); + STableQueryInfo* pTableQueryInfo = iaInfo->pCurrent; setIntervalQueryRange(pTableQueryInfo, pBlock->info.window.skey, &pTaskInfo->window); - doMergeIntervalAggImpl(pOperator, &pInfo->binfo.resultRowInfo, pBlock, scanFlag, pRes); + doMergeIntervalAggImpl(pOperator, &iaInfo->binfo.resultRowInfo, pBlock, scanFlag, pRes); + + if (pRes->info.rows >= pOperator->resultInfo.threshold) { + break; + } } + pRes->info.groupId = miaInfo->groupId; if (pRes->info.rows == 0) { doSetOperatorCompleted(pOperator); + } else { + //TODO: ts column index + blockDataUpdateTsWindow(pRes, 0); } size_t rows = pRes->info.rows; @@ -3384,7 +3427,7 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprI pOperator->numOfExprs = numOfCols; pOperator->info = pInfo; - pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doMergeIntervalAgg, doStreamIntervalAgg, NULL, + pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doMergeIntervalAgg, NULL, NULL, destroyIntervalOperatorInfo, NULL, NULL, NULL); code = appendDownstream(pOperator, &downstream, 1); From e46783954d79ea5ee61f26022d8d9b13d6d4fea7 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 10 Jun 2022 10:18:31 +0800 Subject: [PATCH 05/21] fix(query): timezone check minor fix --- source/libs/function/src/builtins.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 8f553f541b..602c3e25e6 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -1054,7 +1054,7 @@ static bool validateHourRange(int8_t hour) { } static bool validateMinuteRange(int8_t hour, int8_t minute, char sign) { - if (minute == 0 || (minute == 30 && (hour == 3 || hour == 5) && sign == '-')) { + if (minute == 0 || (minute == 30 && (hour == 3 || hour == 5) && sign == '+')) { return true; } From 62780bbfb4cbd4432e49b42e646435562db6f242 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 10 Jun 2022 10:24:52 +0800 Subject: [PATCH 06/21] feature: add merge interval operator --- source/libs/executor/src/timewindowoperator.c | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 3ed416ec7c..842e46a8cd 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3236,10 +3236,6 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* STimeWindow win = getActiveTimeWindow(iaInfo->aggSup.pResultBuf, pResultRowInfo, blockStartTs, &iaInfo->interval, iaInfo->interval.precision, &iaInfo->win); - //TODO: pBlock not process not finished - //TODO: different block group id or no group id - //TODO: the last datablock - //TODO: blockDataUpdateTsWindow(pBlock, 0); int32_t ret = setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, iaInfo->binfo.pCtx, @@ -3370,7 +3366,6 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { if (pRes->info.rows == 0) { doSetOperatorCompleted(pOperator); } else { - //TODO: ts column index blockDataUpdateTsWindow(pRes, 0); } @@ -3387,36 +3382,38 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprI if (miaInfo == NULL || pOperator == NULL) { goto _error; } - SIntervalAggOperatorInfo *pInfo = &miaInfo->intervalAggOperatorInfo; - pInfo->win = pTaskInfo->window; - pInfo->order = TSDB_ORDER_ASC; - pInfo->interval = *pInterval; - pInfo->execModel = pTaskInfo->execModel; - pInfo->twAggSup = *pTwAggSupp; + SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; - pInfo->primaryTsIndex = primaryTsSlotId; + iaInfo->win = pTaskInfo->window; + iaInfo->order = TSDB_ORDER_ASC; + iaInfo->interval = *pInterval; + + iaInfo->execModel = pTaskInfo->execModel; + iaInfo->twAggSup = *pTwAggSupp; + + iaInfo->primaryTsIndex = primaryTsSlotId; miaInfo->groupIntervalHash = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_NO_LOCK); size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; initResultSizeInfo(pOperator, 4096); int32_t code = - initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, pResBlock, keyBufSize, pTaskInfo->id.str); + initAggInfo(&iaInfo->binfo, &iaInfo->aggSup, pExprInfo, numOfCols, pResBlock, keyBufSize, pTaskInfo->id.str); - initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pInfo->win); + initExecTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &iaInfo->win); - pInfo->timeWindowInterpo = timeWindowinterpNeeded(pInfo->binfo.pCtx, numOfCols, pInfo); - if (pInfo->timeWindowInterpo) { - pInfo->binfo.resultRowInfo.openWindow = tdListNew(sizeof(SResultRowPosition)); + iaInfo->timeWindowInterpo = timeWindowinterpNeeded(iaInfo->binfo.pCtx, numOfCols, iaInfo); + if (iaInfo->timeWindowInterpo) { + iaInfo->binfo.resultRowInfo.openWindow = tdListNew(sizeof(SResultRowPosition)); } - // pInfo->pTableQueryInfo = initTableQueryInfo(pTableGroupInfo); - if (code != TSDB_CODE_SUCCESS /* || pInfo->pTableQueryInfo == NULL*/) { + // iaInfo->pTableQueryInfo = initTableQueryInfo(pTableGroupInfo); + if (code != TSDB_CODE_SUCCESS /* || iaInfo->pTableQueryInfo == NULL*/) { goto _error; } - initResultRowInfo(&pInfo->binfo.resultRowInfo, (int32_t)1); + initResultRowInfo(&iaInfo->binfo.resultRowInfo, (int32_t)1); pOperator->name = "TimeMergeIntervalAggOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL; @@ -3425,10 +3422,10 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprI pOperator->pExpr = pExprInfo; pOperator->pTaskInfo = pTaskInfo; pOperator->numOfExprs = numOfCols; - pOperator->info = pInfo; + pOperator->info = miaInfo; pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doMergeIntervalAgg, NULL, NULL, - destroyIntervalOperatorInfo, NULL, NULL, NULL); + destroyMergeIntervalOperatorInfo, NULL, NULL, NULL); code = appendDownstream(pOperator, &downstream, 1); if (code != TSDB_CODE_SUCCESS) { @@ -3438,8 +3435,8 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprI return pOperator; _error: - destroyMergeIntervalOperatorInfo(pInfo, numOfCols); - taosMemoryFreeClear(pInfo); + destroyMergeIntervalOperatorInfo(miaInfo, numOfCols); + taosMemoryFreeClear(miaInfo); taosMemoryFreeClear(pOperator); pTaskInfo->code = code; return NULL; From b3ce29dc4aae00bd3a99b3f3328fe46f63b81718 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 10 Jun 2022 11:09:31 +0800 Subject: [PATCH 07/21] add time window operator --- source/libs/executor/inc/executorimpl.h | 3 + source/libs/executor/src/executorimpl.c | 24 ++- source/libs/executor/src/timewindowoperator.c | 138 +++++++++--------- 3 files changed, 90 insertions(+), 75 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 6a7e71412e..969a3aac62 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -797,6 +797,9 @@ SOperatorInfo* createSortedMergeOperatorInfo(SOperatorInfo** downstream, int32_t SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, STimeWindowAggSupp *pTwAggSupp, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, + SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, + SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index e82c94073c..480814acef 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1956,8 +1956,9 @@ static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_ } int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, - SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, const int32_t* rowCellOffset, - SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo) { + SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs, + const int32_t* rowCellOffset, SSDataBlock* pBlock, + SExecTaskInfo* pTaskInfo) { SFilePage* page = getBufPage(pBuf, resultRowPosition->pageId); SResultRow* pRow = (SResultRow*)((char*)page + resultRowPosition->offset); @@ -4553,7 +4554,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo SScanPhysiNode* pScanPhyNode = (SScanPhysiNode*)pPhyNode; // simple child table. STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; STimeWindowAggSupp twSup = { - .waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN}; + .waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN}; tsdbReaderT pDataReader = NULL; if (pHandle->vnode) { pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond); @@ -4665,6 +4666,21 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo int32_t tsSlotId = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; pOptr = createIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, &as, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL == type) { + SMergeIntervalPhysiNode * pIntervalPhyNode = (SMergeIntervalPhysiNode*)pPhyNode; + + SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &num); + SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); + + SInterval interval = {.interval = pIntervalPhyNode->interval, + .sliding = pIntervalPhyNode->sliding, + .intervalUnit = pIntervalPhyNode->intervalUnit, + .slidingUnit = pIntervalPhyNode->slidingUnit, + .offset = pIntervalPhyNode->offset, + .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; + + int32_t tsSlotId = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; + pOptr = createMergeIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) { int32_t children = 8; pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); @@ -4697,7 +4713,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo int32_t numOfOutputCols = 0; SArray* pColList = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, pTaskInfo, COL_MATCH_FROM_SLOT_ID); - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); + SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); SSDataBlock* pInputDataBlock = createResDataBlock(pChildNode->pOutputDataBlockDesc); pOptr = createMultiwaySortMergeOperatorInfo(ops, size, pInputDataBlock, pResBlock, sortInfo, pColList, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION == type) { diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 842e46a8cd..32c3b84527 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1870,8 +1870,8 @@ _error: return NULL; } -static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, - int32_t tableGroupId, SArray* pUpdated) { +static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, int32_t tableGroupId, + SArray* pUpdated) { SStreamFinalIntervalOperatorInfo* pInfo = (SStreamFinalIntervalOperatorInfo*)pOperatorInfo->info; SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; @@ -1886,7 +1886,7 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); tsCols = (int64_t*)pColDataInfo->pData; } else { - return ; + return; } int32_t startPos = ascScan ? 0 : (pSDataBlock->info.rows - 1); @@ -1903,13 +1903,14 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc pos->groupId = tableGroupId; pos->pos = (SResultRowPosition){.pageId = pResult->pageId, .offset = pResult->offset}; *(int64_t*)pos->key = pResult->win.skey; - forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, - nextWin.ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); + forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, NULL, + TSDB_ORDER_ASC); if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdated) { - saveResult(pResult, tableGroupId, pUpdated); + saveResult(pResult, tableGroupId, pUpdated); } // window start(end) key interpolation - // doWindowBorderInterpolation(pInfo, pSDataBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, forwardRows); + // doWindowBorderInterpolation(pInfo, pSDataBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, + // forwardRows); updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); doApplyFunctions(pTaskInfo, pInfo->binfo.pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, tsCols, pSDataBlock->info.rows, numOfOutput, TSDB_ORDER_ASC); @@ -2040,10 +2041,10 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { int32_t childIndex = getChildIndex(pBlock); SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); SIntervalAggOperatorInfo* pChildInfo = pChildOp->info; - doClearWindows(&pChildInfo->aggSup, &pChildInfo->binfo, &pChildInfo->interval, - pChildInfo->primaryTsIndex, pChildOp->numOfExprs, pBlock, NULL); - rebuildIntervalWindow(pInfo, pUpWins, pInfo->binfo.pRes->info.groupId, - pOperator->numOfExprs, pOperator->pTaskInfo); + doClearWindows(&pChildInfo->aggSup, &pChildInfo->binfo, &pChildInfo->interval, pChildInfo->primaryTsIndex, + pChildOp->numOfExprs, pBlock, NULL); + rebuildIntervalWindow(pInfo, pUpWins, pInfo->binfo.pRes->info.groupId, pOperator->numOfExprs, + pOperator->pTaskInfo); taosArrayDestroy(pUpWins); continue; } @@ -2053,7 +2054,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { break; } if (isFinalInterval(pInfo)) { - int32_t chIndex = getChildIndex(pBlock); + int32_t chIndex = getChildIndex(pBlock); int32_t size = taosArrayGetSize(pInfo->pChildren); // if chIndex + 1 - size > 0, add new child for (int32_t i = 0; i < chIndex + 1 - size; i++) { @@ -2063,7 +2064,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { } taosArrayPush(pInfo->pChildren, &pChildOp); } - SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, chIndex); + SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, chIndex); SStreamFinalIntervalOperatorInfo* pChInfo = pChildOp->info; setInputDataBlock(pChildOp, pChInfo->binfo.pCtx, pBlock, pChInfo->order, MAIN_SCAN, true); doHashInterval(pChildOp, pBlock, pBlock->info.groupId, NULL); @@ -2071,12 +2072,10 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { doHashInterval(pOperator, pBlock, pBlock->info.groupId, pUpdated); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); } - + if (isFinalInterval(pInfo)) { - closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, - &pInfo->interval, pClosed); - finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pClosed, - pInfo->binfo.rowCellInfoOffset); + closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pClosed); + finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pClosed, pInfo->binfo.rowCellInfoOffset); if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { taosArrayAddAll(pUpdated, pClosed); } @@ -2100,34 +2099,35 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { return pInfo->binfo.pRes; } -SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, - SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild) { - SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; +SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo, int32_t numOfChild) { + SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; SStreamFinalIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamFinalIntervalOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { goto _error; } pInfo->order = TSDB_ORDER_ASC; - pInfo->interval = (SInterval) {.interval = pIntervalPhyNode->interval, - .sliding = pIntervalPhyNode->sliding, - .intervalUnit = pIntervalPhyNode->intervalUnit, - .slidingUnit = pIntervalPhyNode->slidingUnit, - .offset = pIntervalPhyNode->offset, - .precision = - ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; - pInfo->twAggSup = (STimeWindowAggSupp){.waterMark = pIntervalPhyNode->window.watermark, + pInfo->interval = (SInterval){.interval = pIntervalPhyNode->interval, + .sliding = pIntervalPhyNode->sliding, + .intervalUnit = pIntervalPhyNode->intervalUnit, + .slidingUnit = pIntervalPhyNode->slidingUnit, + .offset = pIntervalPhyNode->offset, + .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; + pInfo->twAggSup = (STimeWindowAggSupp){ + .waterMark = pIntervalPhyNode->window.watermark, .calTrigger = pIntervalPhyNode->window.triggerType, .maxTs = INT64_MIN, - .winMap = NULL, }; + .winMap = NULL, + }; pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; initResultSizeInfo(pOperator, 4096); - int32_t numOfCols = 0; - SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); - int32_t code = initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, - pResBlock, keyBufSize, pTaskInfo->id.str); + int32_t code = + initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, pResBlock, keyBufSize, pTaskInfo->id.str); initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); if (code != TSDB_CODE_SUCCESS) { goto _error; @@ -2149,7 +2149,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, if (!isFinalInterval(pInfo)) { pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; } - pInfo->pUpdateRes = createResDataBlock(pPhyNode->pOutputDataBlockDesc);\ + pInfo->pUpdateRes = createResDataBlock(pPhyNode->pOutputDataBlockDesc); pInfo->pUpdateRes->info.type = STREAM_REPROCESS; blockDataEnsureCapacity(pInfo->pUpdateRes, 128); pInfo->pPhyNode = nodesCloneNode(pPhyNode); @@ -2163,9 +2163,9 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pOperator->numOfExprs = numOfCols; pOperator->info = pInfo; - pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, NULL, - destroyStreamFinalIntervalOperatorInfo, aggEncodeResultRow, aggDecodeResultRow, - NULL); + pOperator->fpSet = + createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, NULL, destroyStreamFinalIntervalOperatorInfo, + aggEncodeResultRow, aggDecodeResultRow, NULL); code = appendDownstream(pOperator, &downstream, 1); if (code != TSDB_CODE_SUCCESS) { @@ -2205,8 +2205,7 @@ void destroyStreamSessionAggOperatorInfo(void* param, int32_t numOfOutput) { } } -int32_t initBiasicInfo(SOptrBasicInfo* pBasicInfo, SExprInfo* pExprInfo, - int32_t numOfCols, SSDataBlock* pResultBlock) { +int32_t initBiasicInfo(SOptrBasicInfo* pBasicInfo, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResultBlock) { pBasicInfo->pCtx = createSqlFunctionCtx(pExprInfo, numOfCols, &pBasicInfo->rowCellInfoOffset); pBasicInfo->pRes = pResultBlock; for (int32_t i = 0; i < numOfCols; ++i) { @@ -3179,10 +3178,10 @@ _error: typedef struct SMergeIntervalAggOperatorInfo { SIntervalAggOperatorInfo intervalAggOperatorInfo; - SHashObj* groupIntervalHash; - bool hasGroupId; - uint64_t groupId; - SSDataBlock *prefetchedBlock; + SHashObj* groupIntervalHash; + bool hasGroupId; + uint64_t groupId; + SSDataBlock* prefetchedBlock; } SMergeIntervalAggOperatorInfo; void destroyMergeIntervalOperatorInfo(void* param, int32_t numOfOutput) { @@ -3191,13 +3190,14 @@ void destroyMergeIntervalOperatorInfo(void* param, int32_t numOfOutput) { destroyIntervalOperatorInfo(&miaInfo->intervalAggOperatorInfo, numOfOutput); } -static int32_t outputPrevIntervalResult(SOperatorInfo * pOperatorInfo, uint64_t tableGroupId, SSDataBlock *pResultBlock, STimeWindow* newWin) { - SMergeIntervalAggOperatorInfo *miaInfo = pOperatorInfo->info; - SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; - SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; - bool ascScan = (iaInfo->order == TSDB_ORDER_ASC); +static int32_t outputPrevIntervalResult(SOperatorInfo* pOperatorInfo, uint64_t tableGroupId, SSDataBlock* pResultBlock, + STimeWindow* newWin) { + SMergeIntervalAggOperatorInfo* miaInfo = pOperatorInfo->info; + SIntervalAggOperatorInfo* iaInfo = &miaInfo->intervalAggOperatorInfo; + SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; + bool ascScan = (iaInfo->order == TSDB_ORDER_ASC); - STimeWindow *prevWin= taosHashGet(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); + STimeWindow* prevWin = taosHashGet(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); if (prevWin == NULL) { taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), newWin, sizeof(STimeWindow)); return 0; @@ -3205,8 +3205,8 @@ static int32_t outputPrevIntervalResult(SOperatorInfo * pOperatorInfo, uint64_t if (ascScan && newWin->skey > prevWin->ekey || (!ascScan) && newWin->skey < prevWin->ekey) { SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &prevWin->skey, TSDB_KEYSIZE, tableGroupId); - SResultRowPosition* p1 = - (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); + SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, + GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); ASSERT(p1 != NULL); finalizeResultRowIntoResultDataBlock(iaInfo->aggSup.pResultBuf, p1, iaInfo->binfo.pCtx, pOperatorInfo->pExpr, @@ -3220,9 +3220,9 @@ static int32_t outputPrevIntervalResult(SOperatorInfo * pOperatorInfo, uint64_t } static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pBlock, - int32_t scanFlag, SSDataBlock* pResultBlock) { - SMergeIntervalAggOperatorInfo *miaInfo = pOperatorInfo->info; - SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; + int32_t scanFlag, SSDataBlock* pResultBlock) { + SMergeIntervalAggOperatorInfo* miaInfo = pOperatorInfo->info; + SIntervalAggOperatorInfo* iaInfo = &miaInfo->intervalAggOperatorInfo; SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; @@ -3255,10 +3255,9 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* doInterpUnclosedTimeWindow(pOperatorInfo, numOfOutput, pResultRowInfo, pBlock, scanFlag, tsCols, &pos); // restore current time window - ret = - setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, - iaInfo->binfo.pCtx, - numOfOutput, iaInfo->binfo.rowCellInfoOffset, &iaInfo->aggSup, pTaskInfo); + ret = setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, + iaInfo->binfo.pCtx, numOfOutput, iaInfo->binfo.rowCellInfoOffset, &iaInfo->aggSup, + pTaskInfo); if (ret != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -3314,11 +3313,10 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* } static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SMergeIntervalAggOperatorInfo* miaInfo = pOperator->info; - SIntervalAggOperatorInfo *iaInfo = &miaInfo->intervalAggOperatorInfo; + SIntervalAggOperatorInfo* iaInfo = &miaInfo->intervalAggOperatorInfo; if (pOperator->status == OP_EXEC_DONE) { return NULL; } @@ -3327,9 +3325,9 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { blockDataCleanup(pRes); SOperatorInfo* downstream = pOperator->pDownstream[0]; - int32_t scanFlag = MAIN_SCAN; + int32_t scanFlag = MAIN_SCAN; while (1) { - SSDataBlock *pBlock = NULL; + SSDataBlock* pBlock = NULL; if (miaInfo->prefetchedBlock == NULL) { pBlock = downstream->fpSet.getNextFn(downstream); } else { @@ -3341,7 +3339,6 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { break; } - if (!miaInfo->hasGroupId) { miaInfo->hasGroupId = true; miaInfo->groupId = pBlock->info.groupId; @@ -3375,22 +3372,21 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { } SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, - STimeWindowAggSupp* pTwAggSupp, SExecTaskInfo* pTaskInfo) { + SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, + SExecTaskInfo* pTaskInfo) { SMergeIntervalAggOperatorInfo* miaInfo = taosMemoryCalloc(1, sizeof(SMergeIntervalAggOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (miaInfo == NULL || pOperator == NULL) { goto _error; } - SIntervalAggOperatorInfo * iaInfo = &miaInfo->intervalAggOperatorInfo; + SIntervalAggOperatorInfo* iaInfo = &miaInfo->intervalAggOperatorInfo; iaInfo->win = pTaskInfo->window; iaInfo->order = TSDB_ORDER_ASC; iaInfo->interval = *pInterval; iaInfo->execModel = pTaskInfo->execModel; - iaInfo->twAggSup = *pTwAggSupp; iaInfo->primaryTsIndex = primaryTsSlotId; miaInfo->groupIntervalHash = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_NO_LOCK); From 7a4534a06a17a99e55ecab1bed6350b67990a95d Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 10 Jun 2022 11:20:00 +0800 Subject: [PATCH 08/21] feat: before merge origin 3.0 --- source/libs/executor/inc/executorimpl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 969a3aac62..cc1287df27 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -797,9 +797,13 @@ SOperatorInfo* createSortedMergeOperatorInfo(SOperatorInfo** downstream, int32_t SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, STimeWindowAggSupp *pTwAggSupp, SExecTaskInfo* pTaskInfo); + + SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, SExecTaskInfo* pTaskInfo); + + SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, From 2c386922586a2eefa2cc994eb612e4101aa21385 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Fri, 10 Jun 2022 11:28:41 +0800 Subject: [PATCH 09/21] refactor(sync): add log --- source/libs/sync/src/syncAppendEntriesReply.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 5a543e1605..0aa69b9252 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -178,8 +178,14 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries pMsg->privateTerm < pSender->privateTerm) { snapshotSenderStart(pSender); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + char* s = snapshotSender2Str(pSender); - sInfo("sync event snapshot send start sender first time, sender:%s", s); + sInfo( + "sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu sender:%s", + host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); taosMemoryFree(s); } From b351fd82611e3ef4e3c97119d418d1a633f4fd96 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Fri, 10 Jun 2022 13:15:43 +0800 Subject: [PATCH 10/21] refactor(sync): add log --- source/libs/sync/src/syncMain.c | 6 +++++- source/libs/sync/src/syncRaftLog.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 9516df64da..fcbe7063a4 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -35,7 +35,7 @@ #include "syncVoteMgr.h" #include "tref.h" -bool gRaftDetailLog = false; +bool gRaftDetailLog = true; static int32_t tsNodeRefId = -1; @@ -1155,6 +1155,8 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { } void syncNodeBecomeFollower(SSyncNode* pSyncNode) { + sInfo("sync event become follower"); + // maybe clear leader cache if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { pSyncNode->leaderCache = EMPTY_RAFT_ID; @@ -1187,6 +1189,8 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode) { // /\ UNCHANGED <> // void syncNodeBecomeLeader(SSyncNode* pSyncNode) { + sInfo("sync event become leader"); + // state change pSyncNode->state = TAOS_SYNC_STATE_LEADER; diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 49509ae979..e3ce4579fd 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -14,6 +14,7 @@ */ #include "syncRaftLog.h" +#include "syncRaftCfg.h" #include "wal.h" // refactor, log[0 .. n] ==> log[m .. n] @@ -161,7 +162,9 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr walFsync(pWal, true); - sTrace("sync event write index:%" PRId64, pEntry->index); + sTrace("sync event write index:%ld, %s, isStandBy:%d, msgType:%s, originalRpcType:%s", pEntry->index, + syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, + TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType)); return code; } From a4fba1c70b243571b91c3522d17677e1f3e7fd72 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 10 Jun 2022 13:54:44 +0800 Subject: [PATCH 11/21] feat:add async logic for schemaless --- source/client/src/clientSml.c | 34 ++++++++++++++++------------------ source/client/test/smlTest.cpp | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 3039f93a30..25d15ab11e 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -69,6 +69,7 @@ for (int i = 1; i < keyLen; ++i) { \ #define NCHAR_ADD_LEN 3 // L"nchar" 3 means L" " #define MAX_RETRY_TIMES 5 +#define LINE_BATCH 20 //================================================================================================= typedef TSDB_SML_PROTOCOL_TYPE SMLProtocolType; @@ -164,7 +165,7 @@ typedef struct{ typedef struct { int64_t id; - Params params; + Params *params; bool isLast; SMLProtocolType protocol; @@ -2311,29 +2312,24 @@ static int32_t isSchemalessDb(STscObj *taos, SCatalog *catalog){ } static void smlInsertCallback(void* param, void* res, int32_t code) { - if (code != TSDB_CODE_SUCCESS) { - uError("failed to execute, reason:%s\n", taos_errstr(res)); - } SRequestObj *pRequest = (SRequestObj *)res; - int32_t rows = taos_affected_rows(pRequest); SSmlHandle* info = (SSmlHandle *)param; // lock - taosThreadSpinLock(&info->params.lock); - info->params.request->body.resInfo.numOfRows += rows; if(code != TSDB_CODE_SUCCESS){ - info->params.request->code = code; + taosThreadSpinLock(&info->params->lock); + info->params->request->code = code; + taosThreadSpinUnlock(&info->params->lock); } - taosThreadSpinUnlock(&info->params.lock); // unlock - printf("SML:0x%"PRIx64" insert finished, code: %d, total: %d, insert: %d\n", info->id, code, info->affectedRows, rows); - Params pParam = info->params; + printf("SML:0x%"PRIx64" insert finished, code: %d, total: %d\n", info->id, code, info->affectedRows); + Params *pParam = info->params; bool isLast = info->isLast; smlDestroyInfo(info); if(isLast){ - tsem_post(&pParam.sem); + tsem_post(&pParam->sem); } } @@ -2366,8 +2362,9 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr } ((STscObj *)taos)->schemalessType = 1; - SSmlMsgBuf msg = {.buf = request->msgBuf, .len = ERROR_MSG_BUF_DEFAULT_SIZE}; + SSmlMsgBuf msg = {.len = ERROR_MSG_BUF_DEFAULT_SIZE, .buf = request->msgBuf}; + int cnt = ceil(((double)numLines)/LINE_BATCH); Params params = {.request = request}; tsem_init(¶ms.sem, 0, 0); taosThreadSpinInit(&(params.lock), 0); @@ -2385,7 +2382,7 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr goto end; } - if(isSchemalessDb(taos, params.catalog) != TSDB_CODE_SUCCESS){ + if(isSchemalessDb(((STscObj *)taos), params.catalog) != TSDB_CODE_SUCCESS){ request->code = TSDB_CODE_SML_INVALID_DB_CONF; smlBuildInvalidDataMsg(&msg, "Cannot write data to a non schemaless database", NULL); goto end; @@ -2409,8 +2406,7 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr goto end; } - int32_t perBatch = 20000; - for (int i = 0; i < ceil(((double)numLines)/perBatch); ++i) { + for (int i = 0; i < cnt; ++i) { SRequestObj* req = (SRequestObj*)createRequest((STscObj *)taos, TSDB_SQL_INSERT); if(!req){ request->code = TSDB_CODE_OUT_OF_MEMORY; @@ -2424,7 +2420,9 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr goto end; } - if(numLines >= perBatch){ + int32_t perBatch = LINE_BATCH; + + if(numLines > perBatch){ numLines -= perBatch; info->isLast = false; }else{ @@ -2433,7 +2431,7 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr info->isLast = true; } - info->params = params; + info->params = ¶ms; info->pCatalog = params.catalog; info->affectedRows = perBatch; info->pRequest->body.queryFp = smlInsertCallback; diff --git a/source/client/test/smlTest.cpp b/source/client/test/smlTest.cpp index 8137583978..25bf13a113 100644 --- a/source/client/test/smlTest.cpp +++ b/source/client/test/smlTest.cpp @@ -1325,7 +1325,7 @@ TEST(testCase, sml_oom_Test) { pRes = taos_query(taos, "use oom"); taos_free_result(pRes); - TAOS_RES* res = taos_schemaless_insert(taos, (char**)sql, 100, TSDB_SML_LINE_PROTOCOL, 0); + TAOS_RES* res = taos_schemaless_insert(taos, (char**)sql, sizeof(sql)/sizeof(sql[0]), TSDB_SML_LINE_PROTOCOL, 0); ASSERT_EQ(taos_errno(res), 0); taos_free_result(pRes); } From 736862541e86b1f4b0749b908bf9eac1b6b0b4d7 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Fri, 10 Jun 2022 15:19:11 +0800 Subject: [PATCH 12/21] fix(sync): restart with config change --- source/libs/sync/inc/syncInt.h | 4 +- source/libs/sync/src/syncAppendEntries.c | 19 +++++-- source/libs/sync/src/syncAppendEntriesReply.c | 5 +- source/libs/sync/src/syncCommit.c | 12 ++++- source/libs/sync/src/syncMain.c | 50 +++++++++++-------- source/libs/sync/src/syncRaftLog.c | 4 +- source/libs/sync/src/syncSnapshot.c | 49 ++++++++++-------- 7 files changed, 88 insertions(+), 55 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 10218f69e6..83f0bd7dd8 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -201,8 +201,8 @@ void syncNodeRelease(SSyncNode* pNode); // raft state change -------------- void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term); -void syncNodeBecomeFollower(SSyncNode* pSyncNode); -void syncNodeBecomeLeader(SSyncNode* pSyncNode); +void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr); +void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr); void syncNodeCandidate2Leader(SSyncNode* pSyncNode); void syncNodeFollower2Candidate(SSyncNode* pSyncNode); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 3c558b60c8..ae4ccaf2d5 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -150,7 +150,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { "ths->state:%d, logOK:%d", pMsg->term, ths->pRaftStore->currentTerm, ths->state, logOK); - syncNodeBecomeFollower(ths); + syncNodeBecomeFollower(ths, "from candidate by append entries"); // ret or reply? return ret; @@ -380,9 +380,9 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { // change isStandBy to normal if (!isDrop) { if (ths->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(ths); + syncNodeBecomeLeader(ths, "config change"); } else { - syncNodeBecomeFollower(ths); + syncNodeBecomeFollower(ths, "config change"); } } @@ -469,7 +469,7 @@ static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { // delete confict entries code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); ASSERT(code == 0); - sInfo("sync event log truncate, from %ld to %ld", delBegin, delEnd); + sInfo("sync event vgId:%d log truncate, from %ld to %ld", ths->vgId, delBegin, delEnd); logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); return code; @@ -571,7 +571,7 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs if (condition) { sTrace("recv SyncAppendEntries, candidate to follower"); - syncNodeBecomeFollower(ths); + syncNodeBecomeFollower(ths, "from candidate by append entries"); // do not reply? return ret; } @@ -742,6 +742,15 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs if (pMsg->commitIndex > ths->commitIndex) { // has commit entry in local if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + // advance commit index to sanpshot first + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + if (snapshot.lastApplyIndex > ths->commitIndex) { + sInfo("sync event vgId:%d commit by snapshot from index:%ld to index:%ld, %s", ths->vgId, ths->commitIndex, + snapshot.lastApplyIndex, syncUtilState2String(ths->state)); + ths->commitIndex = snapshot.lastApplyIndex; + } + SyncIndex beginIndex = ths->commitIndex + 1; SyncIndex endIndex = pMsg->commitIndex; diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 0aa69b9252..af83b3ac94 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -184,8 +184,9 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries char* s = snapshotSender2Str(pSender); sInfo( - "sync event snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu sender:%s", - host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); + "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " + "sender:%s", + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); taosMemoryFree(s); } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index c092b31adf..96f60be51b 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -48,10 +48,18 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pNextIndex", pSyncNode->pNextIndex); syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pMatchIndex", pSyncNode->pMatchIndex); + // advance commit index to sanpshot first + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + if (snapshot.lastApplyIndex > pSyncNode->commitIndex) { + sInfo("sync event vgId:%d commit by snapshot from index:%ld to index:%ld, %s", pSyncNode->vgId, + pSyncNode->commitIndex, snapshot.lastApplyIndex, syncUtilState2String(pSyncNode->state)); + pSyncNode->commitIndex = snapshot.lastApplyIndex; + } + // update commit index SyncIndex newCommitIndex = pSyncNode->commitIndex; - for (SyncIndex index = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore); index > pSyncNode->commitIndex; - --index) { + for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) { bool agree = syncAgree(pSyncNode, index); sTrace("syncMaybeAdvanceCommitIndex syncAgree:%d, index:%ld, pSyncNode->commitIndex:%ld", agree, index, pSyncNode->commitIndex); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index fcbe7063a4..f124cff786 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -411,6 +411,8 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { SSyncInfo* pSyncInfo = (SSyncInfo*)pOldSyncInfo; + sInfo("sync event vgId:%d sync open", pSyncInfo->vgId); + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); assert(pSyncNode != NULL); memset(pSyncNode, 0, sizeof(SSyncNode)); @@ -628,7 +630,7 @@ void syncNodeStart(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { raftStoreNextTerm(pSyncNode->pRaftStore); - syncNodeBecomeLeader(pSyncNode); + syncNodeBecomeLeader(pSyncNode, "one replica start"); syncNodeLog2("==state change become leader immediately==", pSyncNode); @@ -654,7 +656,7 @@ void syncNodeStart(SSyncNode* pSyncNode) { return; } - syncNodeBecomeFollower(pSyncNode); + syncNodeBecomeFollower(pSyncNode, "first start"); // for test int32_t ret = 0; @@ -687,6 +689,8 @@ void syncNodeStartStandBy(SSyncNode* pSyncNode) { } void syncNodeClose(SSyncNode* pSyncNode) { + sInfo("sync event vgId:%d sync close", pSyncNode->vgId); + int32_t ret; assert(pSyncNode != NULL); @@ -1149,13 +1153,13 @@ void syncNodeRelease(SSyncNode* pNode) { taosReleaseRef(tsNodeRefId, pNode->rid) void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { if (term > pSyncNode->pRaftStore->currentTerm) { raftStoreSetTerm(pSyncNode->pRaftStore, term); - syncNodeBecomeFollower(pSyncNode); + syncNodeBecomeFollower(pSyncNode, "update term"); raftStoreClearVote(pSyncNode->pRaftStore); } } -void syncNodeBecomeFollower(SSyncNode* pSyncNode) { - sInfo("sync event become follower"); +void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { + sInfo("sync event vgId:%d become follower, %s", pSyncNode->vgId, debugStr); // maybe clear leader cache if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { @@ -1188,8 +1192,8 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode) { // evoterLog |-> voterLog[i]]} // /\ UNCHANGED <> // -void syncNodeBecomeLeader(SSyncNode* pSyncNode) { - sInfo("sync event become leader"); +void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { + sInfo("sync event vgId:%d become leader, %s", pSyncNode->vgId, debugStr); // state change pSyncNode->state = TAOS_SYNC_STATE_LEADER; @@ -1241,7 +1245,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode) { void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { assert(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); assert(voteGrantedMajority(pSyncNode->pVotesGranted)); - syncNodeBecomeLeader(pSyncNode); + syncNodeBecomeLeader(pSyncNode, "candidate to leader"); syncNodeLog2("==state change syncNodeCandidate2Leader==", pSyncNode); @@ -1264,14 +1268,14 @@ void syncNodeFollower2Candidate(SSyncNode* pSyncNode) { void syncNodeLeader2Follower(SSyncNode* pSyncNode) { assert(pSyncNode->state == TAOS_SYNC_STATE_LEADER); - syncNodeBecomeFollower(pSyncNode); + syncNodeBecomeFollower(pSyncNode, "leader to follower"); syncNodeLog2("==state change syncNodeLeader2Follower==", pSyncNode); } void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { assert(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - syncNodeBecomeFollower(pSyncNode); + syncNodeBecomeFollower(pSyncNode, "candidate to follower"); syncNodeLog2("==state change syncNodeCandidate2Follower==", pSyncNode); } @@ -1728,17 +1732,19 @@ const char* syncStr(ESyncState state) { int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { int32_t code = 0; ESyncState state = flag; - sInfo("sync event commit from index:%" PRId64 " to index:%" PRId64 ", %s", beginIndex, endIndex, - syncUtilState2String(state)); + sInfo("sync event vgId:%d commit by wal from index:%" PRId64 " to index:%" PRId64 ", %s", ths->vgId, beginIndex, + endIndex, syncUtilState2String(state)); - // maybe execute by leader, skip snapshot - SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; - if (ths->pFsm->FpGetSnapshot != NULL) { - ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); - } - if (beginIndex <= snapshot.lastApplyIndex) { - beginIndex = snapshot.lastApplyIndex + 1; - } + /* + // maybe execute by leader, skip snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (ths->pFsm->FpGetSnapshot != NULL) { + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + } + if (beginIndex <= snapshot.lastApplyIndex) { + beginIndex = snapshot.lastApplyIndex + 1; + } + */ // execute fsm if (ths->pFsm != NULL) { @@ -1795,9 +1801,9 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, // change isStandBy to normal if (!isDrop) { if (ths->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(ths); + syncNodeBecomeLeader(ths, "config change"); } else { - syncNodeBecomeFollower(ths); + syncNodeBecomeFollower(ths, "config change"); } } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index e3ce4579fd..c53e5916ae 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -162,8 +162,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr walFsync(pWal, true); - sTrace("sync event write index:%ld, %s, isStandBy:%d, msgType:%s, originalRpcType:%s", pEntry->index, - syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, + sTrace("sync event vgId:%d write index:%ld, %s, isStandBy:%d, msgType:%s, originalRpcType:%s", pData->pSyncNode->vgId, + pEntry->index, syncUtilState2String(pData->pSyncNode->state), pData->pSyncNode->pRaftCfg->isStandBy, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType)); return code; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index a68312d07f..af139ccf6e 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -109,8 +109,10 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { char host[128]; uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); - sTrace("sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", host, - port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, msgStr); + sTrace( + "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, msgStr); taosMemoryFree(msgStr); syncSnapshotSendDestroy(pMsg); @@ -230,13 +232,17 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { - sTrace("sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", - host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - msgStr); + sTrace( + "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, msgStr); } else { - sTrace("sync event snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", - host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, - msgStr); + sTrace( + "sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, msgStr); } taosMemoryFree(msgStr); @@ -264,8 +270,8 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { char host[128]; uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); - sTrace("sync event snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", host, port, pSender->seq, pSender->ack, - msgStr); + sTrace("sync event vgId:%d snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", pSender->pSyncNode->vgId, host, + port, pSender->seq, pSender->ack, msgStr); taosMemoryFree(msgStr); syncSnapshotSendDestroy(pMsg); @@ -476,8 +482,8 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char host[128]; uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - sTrace("sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, - pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + sTrace("sync event vgId:%d snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); taosMemoryFree(msgStr); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { @@ -495,9 +501,11 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); sInfo( - "sync event snapshot recv from %s:%d finish, update log begin index:%ld, snapshot.lastApplyIndex:%ld, " + "sync event vgId:%d snapshot recv from %s:%d finish, update log begin index:%ld, " + "snapshot.lastApplyIndex:%ld, " "snapshot.lastApplyTerm:%lu, raft log:%s", - host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, logSimpleStr); + pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, + logSimpleStr); taosMemoryFree(logSimpleStr); pReceiver->pWriter = NULL; @@ -506,8 +514,8 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { needRsp = true; char *msgStr = syncSnapshotSend2Str(pMsg); - sTrace("sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, - pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + sTrace("sync event vgId:%d snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); taosMemoryFree(msgStr); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { @@ -520,8 +528,9 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); char *msgStr = syncSnapshotSend2Str(pMsg); - sTrace("sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, - port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + sTrace( + "sync event vgId:%d snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); taosMemoryFree(msgStr); @@ -539,8 +548,8 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { char host[128]; uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - sTrace("sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, - port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + sTrace("sync event vgId:%d snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); taosMemoryFree(msgStr); } else { From f30d9a034e3da70238d1325e5a41ad40c647eb7b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 15:39:11 +0800 Subject: [PATCH 13/21] feat: redistribute vgroup to dnodes --- include/util/taoserror.h | 3 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 3 + source/dnode/mnode/impl/src/mndMain.c | 4 +- source/dnode/mnode/impl/src/mndTrans.c | 4 +- source/dnode/mnode/impl/src/mndVgroup.c | 142 +++++++++------ source/dnode/vnode/src/vnd/vnodeSync.c | 1 + source/util/src/terror.c | 3 +- ...istribute_vgroup_replica3_move_1_vnode.sim | 165 ++++++++++++++++++ 8 files changed, 270 insertions(+), 55 deletions(-) create mode 100644 tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ae0191e6d2..ce6a3f2ce7 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -85,7 +85,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x0102) #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0103) #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0104) -#define TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x0105) //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) @@ -220,6 +219,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE TAOS_DEF_ERROR_CODE(0, 0x0392) #define TSDB_CODE_MND_VGROUP_UN_CHANGED TAOS_DEF_ERROR_CODE(0, 0x0393) #define TSDB_CODE_MND_HAS_OFFLINE_DNODE TAOS_DEF_ERROR_CODE(0, 0x0394) +#define TSDB_CODE_MND_INVALID_REPLICA TAOS_DEF_ERROR_CODE(0, 0x0395) // mnode-stable #define TSDB_CODE_MND_STB_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A0) @@ -260,6 +260,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_TRANS_CONFLICT TAOS_DEF_ERROR_CODE(0, 0x03D3) #define TSDB_CODE_MND_TRANS_UNKNOW_ERROR TAOS_DEF_ERROR_CODE(0, 0x03D4) #define TSDB_CODE_MND_TRANS_CLOG_IS_NULL TAOS_DEF_ERROR_CODE(0, 0x03D5) +#define TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL TAOS_DEF_ERROR_CODE(0, 0x03D6) // mnode-mq #define TSDB_CODE_MND_TOPIC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E0) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 264dc74e36..2589bbd690 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -170,6 +170,9 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_COMPACT_DB, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GET_DB_CFG, mmPutNodeMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_VGROUP_LIST, mmPutNodeMsgToReadQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_REDISTRIBUTE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_MERGE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_BALANCE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_RETRIEVE_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_DROP_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 813e4c30b5..27d13d66b6 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -217,8 +217,8 @@ static int32_t mndInitSteps(SMnode *pMnode) { if (mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster) != 0) return -1; if (mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-qnode", mndInitSnode, mndCleanupSnode) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-qnode", mndInitBnode, mndCleanupBnode) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser) != 0) return -1; if (mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant) != 0) return -1; diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 1e98a3bbf9..1124baf286 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -781,7 +781,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { sendRsp = true; } } else { - if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 3) { + if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 2) { if (code == 0) code = TSDB_CODE_MND_TRANS_UNKNOW_ERROR; sendRsp = true; } @@ -791,7 +791,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { mDebug("trans:%d, send rsp, code:0x%x stage:%s app:%p", pTrans->id, code, mndTransStr(pTrans->stage), pTrans->rpcInfo.ahandle); if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - code = TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL; + code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } SRpcMsg rspMsg = {.code = code, .info = pTrans->rpcInfo}; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 76e65ddd92..1c395fa767 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -59,6 +59,10 @@ int32_t mndInitVgroup(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg); + mndSetMsgHandle(pMnode, TDMT_MND_MERGE_VGROUP, mndProcessSplitVgroupMsg); + mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessBalanceVgroupMsg); + mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndRetrieveVgroups); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndCancelGetNextVgroup); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndRetrieveVnodes); @@ -1009,10 +1013,10 @@ static int32_t mndAddDecVgroupReplicaFromTrans(SMnode *pMnode, STrans *pTrans, S if (pGid == NULL) return 0; + pVgroup->replica--; memcpy(&delGid, pGid, sizeof(SVnodeGid)); memcpy(pGid, &pVgroup->vnodeGid[pVgroup->replica], sizeof(SVnodeGid)); memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid)); - pVgroup->replica--; if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; if (mndAddDropVnodeAction(pMnode, pTrans, pDb, pVgroup, &delGid, true) != 0) return -1; @@ -1040,11 +1044,36 @@ static int32_t mndRedistributeVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId); } - if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id) != 0) goto _OVER; - if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id) != 0) goto _OVER; - if (pNew2 != NULL) { + if (pNew1 != pOld1) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew1->id); + if (numOfVnodes >= pNew1->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew1->id, numOfVnodes, + pNew1->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } + if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id) != 0) goto _OVER; + if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id) != 0) goto _OVER; + } + if (pNew2 != pOld2) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew2->id); + if (numOfVnodes >= pNew2->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew2->id, numOfVnodes, + pNew2->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew2->id) != 0) goto _OVER; if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld2->id) != 0) goto _OVER; + } + if (pNew3 != pOld3) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew3->id); + if (numOfVnodes >= pNew3->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew3->id, numOfVnodes, + pNew3->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew3->id) != 0) goto _OVER; if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld3->id) != 0) goto _OVER; } @@ -1070,88 +1099,105 @@ _OVER: } static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SUserObj *pUser = NULL; - SDnodeObj *pNew1 = NULL; - SDnodeObj *pNew2 = NULL; - SDnodeObj *pNew3 = NULL; - SDnodeObj *pOld1 = NULL; - SDnodeObj *pOld2 = NULL; - SDnodeObj *pOld3 = NULL; - SVgObj *pVgroup = NULL; - SDbObj *pDb = NULL; - int32_t code = -1; - int64_t curMs = taosGetTimestampMs(); - SMDropMnodeReq redReq = {0}; + SMnode *pMnode = pReq->info.node; + SUserObj *pUser = NULL; + SDnodeObj *pNew1 = NULL; + SDnodeObj *pNew2 = NULL; + SDnodeObj *pNew3 = NULL; + SDnodeObj *pOld1 = NULL; + SDnodeObj *pOld2 = NULL; + SDnodeObj *pOld3 = NULL; + SVgObj *pVgroup = NULL; + SDbObj *pDb = NULL; + int32_t code = -1; + int64_t curMs = taosGetTimestampMs(); -#if 0 - if (tDeserializeSCreateDropMQSBNodeReq(pReq->pCont, pReq->contLen, &dropReq) != 0) { + SRedistributeVgroupReq redReq = {0}; + if (tDeserializeSRedistributeVgroupReq(pReq->pCont, pReq->contLen, &redReq) != 0) { terrno = TSDB_CODE_INVALID_MSG; goto _OVER; } -#endif - mDebug("vgId:%d, start to redistribute", 2); + mInfo("vgId:%d, start to redistribute to dnode %d:%d:%d", redReq.vgId, redReq.dnodeId1, redReq.dnodeId2, + redReq.dnodeId3); pUser = mndAcquireUser(pMnode, pReq->conn.user); if (pUser == NULL) { terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; goto _OVER; } - if (mndCheckNodeAuth(pUser) != 0) { - goto _OVER; - } + if (mndCheckNodeAuth(pUser) != 0) goto _OVER; - pVgroup = mndAcquireVgroup(pMnode, 2); + pVgroup = mndAcquireVgroup(pMnode, redReq.vgId); if (pVgroup == NULL) goto _OVER; pDb = mndAcquireDb(pMnode, pVgroup->dbName); if (pDb == NULL) goto _OVER; if (pVgroup->replica == 1) { - pNew1 = mndAcquireDnode(pMnode, 1); + if (redReq.dnodeId2 != -1 || redReq.dnodeId3 != -1) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + pNew1 = mndAcquireDnode(pMnode, redReq.dnodeId1); pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId); - if (pNew1 == NULL || pOld1 == NULL) goto _OVER; - if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs)) { - terrno = TSDB_CODE_NODE_OFFLINE; + if (pNew1 == NULL || pOld1 == NULL) { + terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; goto _OVER; } if (pNew1 == pOld1) { terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED; goto _OVER; } - if (mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL) != 0) goto _OVER; - } - - if (pVgroup->replica == 3) { - pNew1 = mndAcquireDnode(pMnode, 1); - pNew2 = mndAcquireDnode(pMnode, 2); - pNew3 = mndAcquireDnode(pMnode, 3); + if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs)) { + terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE; + goto _OVER; + } + code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL); + } else if (pVgroup->replica == 3) { + if (redReq.dnodeId2 == -1 || redReq.dnodeId3 == -1) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + pNew1 = mndAcquireDnode(pMnode, redReq.dnodeId1); + pNew2 = mndAcquireDnode(pMnode, redReq.dnodeId2); + pNew3 = mndAcquireDnode(pMnode, redReq.dnodeId3); pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId); pOld2 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[1].dnodeId); pOld3 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[2].dnodeId); - if (pNew1 == NULL || pOld1 == NULL || pNew2 == NULL || pOld2 == NULL || pNew3 == NULL || pOld3 == NULL) goto _OVER; - if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs) || !mndIsDnodeOnline(pNew2, curMs) || - !mndIsDnodeOnline(pOld2, curMs) || !mndIsDnodeOnline(pNew3, curMs) || !mndIsDnodeOnline(pOld3, curMs)) { - terrno = TSDB_CODE_NODE_OFFLINE; + if (pNew1 == NULL || pOld1 == NULL || pNew2 == NULL || pOld2 == NULL || pNew3 == NULL || pOld3 == NULL) { + terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; goto _OVER; } - bool changed = true; - if (pNew1 != pOld1 || pNew1 != pOld2 || pNew1 != pOld3) changed = true; - if (pNew2 != pOld1 || pNew2 != pOld2 || pNew2 != pOld3) changed = true; - if (pNew3 != pOld1 || pNew3 != pOld2 || pNew3 != pOld3) changed = true; + if (pNew1 == pNew2 || pNew1 == pNew3 || pNew2 == pNew3) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + bool changed = false; + if (pNew1 != pOld1 && pNew1 != pOld2 && pNew1 != pOld3) changed = true; + if (pNew2 != pOld1 && pNew2 != pOld2 && pNew2 != pOld3) changed = true; + if (pNew3 != pOld1 && pNew3 != pOld2 && pNew3 != pOld3) changed = true; if (!changed) { terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED; goto _OVER; } - if (mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3) != 0) goto _OVER; + if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs) || !mndIsDnodeOnline(pNew2, curMs) || + !mndIsDnodeOnline(pOld2, curMs) || !mndIsDnodeOnline(pNew3, curMs) || !mndIsDnodeOnline(pOld3, curMs)) { + terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE; + goto _OVER; + } + code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3); + } else { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; } if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { - mDebug("vgId:%d, failed to redistribute since %s", 1, terrstr()); + mError("vgId:%d, failed to redistribute to dnode %d %d %d since %s", redReq.vgId, redReq.dnodeId1, redReq.dnodeId2, + redReq.dnodeId3, terrstr()); } mndReleaseDnode(pMnode, pNew1); @@ -1303,9 +1349,7 @@ static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { goto _OVER; } - if (mndCheckNodeAuth(pUser) != 0) { - goto _OVER; - } + if (mndCheckNodeAuth(pUser) != 0) goto _OVER; code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 816c0cfac9..087dcac7b7 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -187,6 +187,7 @@ static void vnodeSyncReconfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigC // todo rpc response here // build rpc msg // put into apply queue + vnodePostBlockMsg(pVnode, TDMT_VND_ALTER_REPLICA); } static void vnodeSyncCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 71c348f810..b5c54b780d 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -90,7 +90,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_AUTH_FAILURE, "Authentication failur TAOS_DEFINE_ERROR(TSDB_CODE_RPC_NETWORK_UNAVAIL, "Unable to establish connection") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL, "Unable to establish connection") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") @@ -225,6 +224,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_IN_DNODE, "Vgroup not in dnode") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, "Vgroup does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_UN_CHANGED, "Vgroup distribution has not changed") TAOS_DEFINE_ERROR(TSDB_CODE_MND_HAS_OFFLINE_DNODE, "Offline dnode exists") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_REPLICA, "Invalid vgroup replica") // mnode-stable TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_ALREADY_EXIST, "STable already exists") @@ -265,6 +265,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_INVALID_STAGE, "Invalid stage to kill TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CONFLICT, "Conflict transaction not completed") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_UNKNOW_ERROR, "Unknown transaction error") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CLOG_IS_NULL, "Transaction commitlog is null") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL, "Unable to establish connection While execute transaction") // mnode-mq TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists") diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim new file mode 100644 index 0000000000..f44061ce6e --- /dev/null +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim @@ -0,0 +1,165 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/deploy.sh -n dnode3 -i 3 +system sh/deploy.sh -n dnode4 -i 4 +system sh/deploy.sh -n dnode5 -i 5 +system sh/cfg.sh -n dnode1 -c supportVnodes -v 0 +system sh/exec.sh -n dnode1 -s start +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode3 -s start +system sh/exec.sh -n dnode4 -s start +#system sh/exec.sh -n dnode5 -s start +sql connect +sql create user u1 pass 'taosdata' + +print =============== step1 create dnode2 +sql create dnode $hostname port 7200 +sql create dnode $hostname port 7300 +sql create dnode $hostname port 7400 +sql create dnode $hostname port 7500 + +$x = 0 +step1: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 +if $rows != 5 then + return -1 +endi +if $data(1)[4] != ready then + goto step1 +endi +if $data(2)[4] != ready then + goto step1 +endi +if $data(3)[4] != ready then + goto step1 +endi +if $data(4)[4] != ready then + goto step1 +endi +#if $data(5)[4] != ready then +# goto step1 +#endi + +print =============== step2: create db +sql create database d1 vgroups 1 replica 3 + +# Invalid vgroup +sql_error redistribute vgroup 3 dnode 5 dnode 3 dnode 4 +# un changed +sql_error redistribute vgroup 2 dnode 2 dnode 3 dnode 4 +# no enought vnodes +sql_error redistribute vgroup 2 dnode 1 dnode 3 dnode 4 +# offline vnodes +sql_error redistribute vgroup 2 dnode 5 dnode 3 dnode 4 +# Invalid replica +sql_error redistribute vgroup 2 dnode 5 +sql_error redistribute vgroup 2 dnode 5 dnode 3 +sql_error redistribute vgroup 2 dnode 2 dnode 3 +sql_error redistribute vgroup 2 dnode 2 dnode 2 +sql_error redistribute vgroup 3 dnode 2 dnode 2 + +system sh/exec.sh -n dnode5 -s start +$x = 0 +step2: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 +if $rows != 5 then + return -1 +endi +if $data(1)[4] != ready then + goto step2 +endi +if $data(2)[4] != ready then + goto step2 +endi +if $data(3)[4] != ready then + goto step2 +endi +if $data(4)[4] != ready then + goto step2 +endi +if $data(5)[4] != ready then + goto step2 +endi + +return +print =============== step3: move follower +$leaderExist = 0 +$leaderVnode = 0 +$follower1 = 0 +$follower2 = 0 + +$x = 0 +step3: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> db not ready! + return -1 + endi +sql show d1.vgroups +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +if $rows != 1 then + return -1 +endi +if $data(2)[3] == leader then + $leaderExist = 1 + $leaderVnode = 4 + $follower1 = 2 + $follower2 = 3 +endi +if $data(2)[4] != ready then + $leaderExist = 1 + $leaderVnode = 3 + $follower1 = 2 + $follower2 = 4 +endi +if $data(3)[4] != ready then + $leaderExist = 1 + $leaderVnode = 2 + $follower1 = 3 + $follower2 = 4 +endi +if $leaderExist != 1 then + goto step3 +endi + +print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 +sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 + +print =============== step4: move leader + + +return + +print =============== step3: drop dnode 3 + + +return +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT +system sh/exec.sh -n dnode3 -s stop -x SIGINT +system sh/exec.sh -n dnode4 -s stop -x SIGINT +system sh/exec.sh -n dnode5 -s stop -x SIGINT From f364ad7bed0ab8ab9594a3c1e9452e8d24c7f0c4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 10 Jun 2022 16:13:41 +0800 Subject: [PATCH 14/21] fix:error in windows --- source/client/src/clientSml.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 25d15ab11e..bf60d25976 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -17,6 +17,9 @@ #include "tname.h" #include "cJSON.h" #include "tglobal.h" +#include "osSemaphore.h" +#include "osThread.h" + //================================================================================================= #define SPACE ' ' @@ -2323,7 +2326,7 @@ static void smlInsertCallback(void* param, void* res, int32_t code) { } // unlock - printf("SML:0x%"PRIx64" insert finished, code: %d, total: %d\n", info->id, code, info->affectedRows); + printf("SML:0x%" PRIx64 " insert finished, code: %d, total: %d\n", info->id, code, info->affectedRows); Params *pParam = info->params; bool isLast = info->isLast; smlDestroyInfo(info); @@ -2362,10 +2365,11 @@ TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int pr } ((STscObj *)taos)->schemalessType = 1; - SSmlMsgBuf msg = {.len = ERROR_MSG_BUF_DEFAULT_SIZE, .buf = request->msgBuf}; + SSmlMsgBuf msg = {ERROR_MSG_BUF_DEFAULT_SIZE, request->msgBuf}; int cnt = ceil(((double)numLines)/LINE_BATCH); - Params params = {.request = request}; + Params params; + params.request = request; tsem_init(¶ms.sem, 0, 0); taosThreadSpinInit(&(params.lock), 0); From b90f1f246c5251adfab1a8c0d4a3fff00fc25cc0 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 16:31:33 +0800 Subject: [PATCH 15/21] refactor: adjust mnode trace log --- source/dnode/mnode/sdb/src/sdbFile.c | 2 +- source/dnode/mnode/sdb/src/sdbHash.c | 2 ++ source/dnode/mnode/sdb/src/sdbRaw.c | 4 ++++ source/dnode/mnode/sdb/src/sdbRow.c | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f98ecf5343..b32abc3eaa 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -357,7 +357,7 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { SdbEncodeFp encodeFp = pSdb->encodeFps[i]; if (encodeFp == NULL) continue; - mTrace("write %s to sdb file, total %d rows", sdbTableName(i), sdbGetSize(pSdb, i)); + mDebug("write %s to sdb file, total %d rows", sdbTableName(i), sdbGetSize(pSdb, i)); SHashObj *hash = pSdb->hashObjs[i]; TdThreadRwlock *pLock = &pSdb->locks[i]; diff --git a/source/dnode/mnode/sdb/src/sdbHash.c b/source/dnode/mnode/sdb/src/sdbHash.c index d1b1e31635..71792a2354 100644 --- a/source/dnode/mnode/sdb/src/sdbHash.c +++ b/source/dnode/mnode/sdb/src/sdbHash.c @@ -83,6 +83,7 @@ const char *sdbStatusName(ESdbStatus status) { } void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper) { +#if 0 EKeyType keyType = pSdb->keyTypes[pRow->type]; if (keyType == SDB_KEY_BINARY) { @@ -96,6 +97,7 @@ void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper) { pRow->refCount, oper, pRow->pObj, sdbStatusName(pRow->status)); } else { } +#endif } static SHashObj *sdbGetHash(SSdb *pSdb, int32_t type) { diff --git a/source/dnode/mnode/sdb/src/sdbRaw.c b/source/dnode/mnode/sdb/src/sdbRaw.c index 7720a8e88a..95985cd3d9 100644 --- a/source/dnode/mnode/sdb/src/sdbRaw.c +++ b/source/dnode/mnode/sdb/src/sdbRaw.c @@ -37,13 +37,17 @@ SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) { pRaw->sver = sver; pRaw->dataLen = dataLen; +#if 0 mTrace("raw:%p, is created, len:%d table:%s", pRaw, dataLen, sdbTableName(type)); +#endif return pRaw; } void sdbFreeRaw(SSdbRaw *pRaw) { if (pRaw != NULL) { +#if 0 mTrace("raw:%p, is freed", pRaw); +#endif taosMemoryFree(pRaw); } } diff --git a/source/dnode/mnode/sdb/src/sdbRow.c b/source/dnode/mnode/sdb/src/sdbRow.c index e57a6b028b..b362ee3a45 100644 --- a/source/dnode/mnode/sdb/src/sdbRow.c +++ b/source/dnode/mnode/sdb/src/sdbRow.c @@ -23,7 +23,9 @@ SSdbRow *sdbAllocRow(int32_t objSize) { return NULL; } +#if 0 mTrace("row:%p, is created, len:%d", pRow->pObj, objSize); +#endif return pRow; } @@ -45,6 +47,8 @@ void sdbFreeRow(SSdb *pSdb, SSdbRow *pRow, bool callFunc) { sdbPrintOper(pSdb, pRow, "free"); +#if 0 mTrace("row:%p, is freed", pRow->pObj); +#endif taosMemoryFreeClear(pRow); } From 56ce7b1baa0511553f18242423895d89eff2a353 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 16:45:31 +0800 Subject: [PATCH 16/21] enh: adjust show transcations output --- source/common/src/systable.c | 2 +- source/dnode/mnode/impl/inc/mndDef.h | 6 +- source/dnode/mnode/impl/src/mndTrans.c | 90 ++++++++++++------- ...istribute_vgroup_replica3_move_1_vnode.sim | 32 +++++-- 4 files changed, 87 insertions(+), 43 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 2b59354d60..cb38a3cf70 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -221,7 +221,7 @@ static const SSysDbTableSchema transSchema[] = { {.name = "db", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR}, {.name = "failed_times", .bytes = 4, .type = TSDB_DATA_TYPE_INT}, {.name = "last_exec_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP}, - {.name = "last_error", .bytes = (TSDB_TRANS_ERROR_LEN - 1) + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR}, + {.name = "last_action_info", .bytes = (TSDB_TRANS_ERROR_LEN - 1) + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR}, }; static const SSysDbTableSchema configSchema[] = { diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 382f8dd55f..ad0a384507 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -120,10 +120,10 @@ typedef struct { SArray* commitActions; int64_t createdTime; int64_t lastExecTime; - int32_t lastErrorAction; + int32_t lastAction; int32_t lastErrorNo; - tmsg_t lastErrorMsgType; - SEpSet lastErrorEpset; + tmsg_t lastMsgType; + SEpSet lastEpset; char dbname[TSDB_DB_FNAME_LEN]; int32_t startFunc; int32_t stopFunc; diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 1124baf286..a689c89037 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -894,10 +894,19 @@ static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransActi code = 0; mDebug("trans:%d, %s:%d write to sdb, type:%s status:%s", pTrans->id, mndTransStr(pAction->stage), pAction->id, sdbTableName(pAction->pRaw->type), sdbStatusName(pAction->pRaw->status)); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = 0; } else { pAction->errCode = (terrno != 0) ? terrno : code; mError("trans:%d, %s:%d failed to write sdb since %s, type:%s status:%s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr(), sdbTableName(pAction->pRaw->type), sdbStatusName(pAction->pRaw->status)); + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = pAction->errCode; } return code; @@ -933,27 +942,48 @@ static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransActio pAction->msgReceived = 0; pAction->errCode = 0; mDebug("trans:%d, %s:%d is sent, %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, detail); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + if (pTrans->lastErrorNo == 0) { + pTrans->lastErrorNo = TSDB_CODE_ACTION_IN_PROGRESS; + } } else { pAction->msgSent = 0; pAction->msgReceived = 0; pAction->errCode = (terrno != 0) ? terrno : code; mError("trans:%d, %s:%d not send since %s, %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr(), detail); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = pAction->errCode; } return code; } +static int32_t mndTransExecNullMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction) { + pAction->rawWritten = 0; + pAction->errCode = 0; + mDebug("trans:%d, %s:%d null action executed", pTrans->id, mndTransStr(pAction->stage), pAction->id); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo == 0; + return 0; +} + static int32_t mndTransExecSingleAction(SMnode *pMnode, STrans *pTrans, STransAction *pAction) { if (pAction->actionType == TRANS_ACTION_RAW) { return mndTransWriteSingleLog(pMnode, pTrans, pAction); } else if (pAction->actionType == TRANS_ACTION_MSG) { return mndTransSendSingleMsg(pMnode, pTrans, pAction); } else { - pAction->rawWritten = 0; - pAction->errCode = 0; - mDebug("trans:%d, %s:%d null action executed", pTrans->id, mndTransStr(pAction->stage), pAction->id); - return 0; + return mndTransExecNullMsg(pMnode, pTrans, pAction); } } @@ -994,19 +1024,19 @@ static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pA if (numOfExecuted == numOfActions) { if (errCode == 0) { - pTrans->lastErrorAction = 0; + pTrans->lastAction = 0; pTrans->lastErrorNo = 0; - pTrans->lastErrorMsgType = 0; - memset(&pTrans->lastErrorEpset, 0, sizeof(pTrans->lastErrorEpset)); + pTrans->lastMsgType = 0; + memset(&pTrans->lastEpset, 0, sizeof(pTrans->lastEpset)); mDebug("trans:%d, all %d actions execute successfully", pTrans->id, numOfActions); return 0; } else { mError("trans:%d, all %d actions executed, code:0x%x", pTrans->id, numOfActions, errCode & 0XFFFF); if (pErrAction != NULL) { - pTrans->lastErrorMsgType = pErrAction->msgType; - pTrans->lastErrorAction = pErrAction->id; + pTrans->lastMsgType = pErrAction->msgType; + pTrans->lastAction = pErrAction->id; pTrans->lastErrorNo = pErrAction->errCode; - pTrans->lastErrorEpset = pErrAction->epSet; + pTrans->lastEpset = pErrAction->epSet; } mndTransResetActions(pMnode, pTrans, pArray); terrno = errCode; @@ -1073,15 +1103,15 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } if (code == 0) { - pTrans->lastErrorAction = 0; + pTrans->lastAction = 0; pTrans->lastErrorNo = 0; - pTrans->lastErrorMsgType = 0; - memset(&pTrans->lastErrorEpset, 0, sizeof(pTrans->lastErrorEpset)); + pTrans->lastMsgType = 0; + memset(&pTrans->lastEpset, 0, sizeof(pTrans->lastEpset)); } else { - pTrans->lastErrorMsgType = pAction->msgType; - pTrans->lastErrorAction = action; - pTrans->lastErrorNo = pAction->errCode; - pTrans->lastErrorEpset = pAction->epSet; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastAction = action; + pTrans->lastErrorNo = code; + pTrans->lastEpset = pAction->epSet; } if (code == 0) { @@ -1432,23 +1462,21 @@ static int32_t mndRetrieveTrans(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBl pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataAppend(pColInfo, numOfRows, (const char *)&pTrans->lastExecTime, false); - char lastError[TSDB_TRANS_ERROR_LEN + VARSTR_HEADER_SIZE] = {0}; - char detail[TSDB_TRANS_ERROR_LEN] = {0}; - if (pTrans->lastErrorNo != 0) { - int32_t len = snprintf(detail, sizeof(detail), "action:%d errno:0x%x(%s) ", pTrans->lastErrorAction, - pTrans->lastErrorNo & 0xFFFF, tstrerror(pTrans->lastErrorNo)); - SEpSet epset = pTrans->lastErrorEpset; - if (epset.numOfEps > 0) { - len += snprintf(detail + len, sizeof(detail) - len, "msgType:%s numOfEps:%d inUse:%d ", - TMSG_INFO(pTrans->lastErrorMsgType), epset.numOfEps, epset.inUse); - for (int32_t i = 0; i < pTrans->lastErrorEpset.numOfEps; ++i) { - len += snprintf(detail + len, sizeof(detail) - len, "ep:%d-%s:%u ", i, epset.eps[i].fqdn, epset.eps[i].port); - } + char lastInfo[TSDB_TRANS_ERROR_LEN + VARSTR_HEADER_SIZE] = {0}; + char detail[TSDB_TRANS_ERROR_LEN] = {0}; + int32_t len = snprintf(detail, sizeof(detail), "action:%d code:0x%x(%s) ", pTrans->lastAction, + pTrans->lastErrorNo & 0xFFFF, tstrerror(pTrans->lastErrorNo)); + SEpSet epset = pTrans->lastEpset; + if (epset.numOfEps > 0) { + len += snprintf(detail + len, sizeof(detail) - len, "msgType:%s numOfEps:%d inUse:%d ", + TMSG_INFO(pTrans->lastMsgType), epset.numOfEps, epset.inUse); + for (int32_t i = 0; i < pTrans->lastEpset.numOfEps; ++i) { + len += snprintf(detail + len, sizeof(detail) - len, "ep:%d-%s:%u ", i, epset.eps[i].fqdn, epset.eps[i].port); } } - STR_WITH_MAXSIZE_TO_VARSTR(lastError, detail, pShow->pMeta->pSchemas[cols].bytes); + STR_WITH_MAXSIZE_TO_VARSTR(lastInfo, detail, pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)lastError, false); + colDataAppend(pColInfo, numOfRows, (const char *)lastInfo, false); numOfRows++; sdbRelease(pSdb, pTrans); diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim index f44061ce6e..cb9f4173ac 100644 --- a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim @@ -104,8 +104,7 @@ if $data(5)[4] != ready then goto step2 endi -return -print =============== step3: move follower +print =============== step31: move follower $leaderExist = 0 $leaderVnode = 0 $follower1 = 0 @@ -120,23 +119,23 @@ step3: return -1 endi sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 if $rows != 1 then return -1 endi -if $data(2)[3] == leader then +if $data(2)[4] == leader then $leaderExist = 1 $leaderVnode = 4 $follower1 = 2 $follower2 = 3 endi -if $data(2)[4] != ready then +if $data(2)[6] == leader then $leaderExist = 1 $leaderVnode = 3 $follower1 = 2 $follower2 = 4 endi -if $data(3)[4] != ready then +if $data(2)[8] == leader then $leaderExist = 1 $leaderVnode = 2 $follower1 = 3 @@ -146,8 +145,25 @@ if $leaderExist != 1 then goto step3 endi -print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 -sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 +print leader $leaderVnode +print follower1 $follower1 +print follower2 $follower2 + +print =============== step32: move follower2 +print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +return +print =============== step33: move follower1 +print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 +sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 + +print =============== step34: move follower2 +print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 +sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 + +print =============== step35: move follower1 +print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 +sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 print =============== step4: move leader From e9d466ec82a7a61d4fa614447d0b3dcee77346d5 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Fri, 10 Jun 2022 16:51:17 +0800 Subject: [PATCH 17/21] refactor(sync) delete some trace log --- source/libs/sync/inc/syncRaftStore.h | 4 +- source/libs/sync/inc/syncSnapshot.h | 21 +- source/libs/sync/src/syncAppendEntries.c | 21 +- source/libs/sync/src/syncAppendEntriesReply.c | 36 ++- source/libs/sync/src/syncCommit.c | 36 ++- source/libs/sync/src/syncEnv.c | 2 +- source/libs/sync/src/syncIndexMgr.c | 2 +- source/libs/sync/src/syncMain.c | 90 ++++---- source/libs/sync/src/syncReplication.c | 3 +- source/libs/sync/src/syncSnapshot.c | 211 ++++++++++++------ .../sync/test/syncSnapshotReceiverTest.cpp | 6 +- source/libs/sync/test/syncTestTool.cpp | 1 - 12 files changed, 268 insertions(+), 165 deletions(-) diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index 9f03ac3e55..e0cbcf0744 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -49,8 +49,8 @@ void raftStoreClearVote(SRaftStore *pRaftStore); void raftStoreNextTerm(SRaftStore *pRaftStore); void raftStoreSetTerm(SRaftStore *pRaftStore, SyncTerm term); int32_t raftStoreFromJson(SRaftStore *pRaftStore, cJSON *pJson); -cJSON *raftStore2Json(SRaftStore *pRaftStore); -char *raftStore2Str(SRaftStore *pRaftStore); +cJSON * raftStore2Json(SRaftStore *pRaftStore); +char * raftStore2Str(SRaftStore *pRaftStore); // for debug ------------------- void raftStorePrint(SRaftStore *pObj); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index b16e47b51e..9fbcdf138b 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -39,8 +39,8 @@ typedef struct SSyncSnapshotSender { bool start; int32_t seq; int32_t ack; - void *pReader; - void *pCurrentBlock; + void * pReader; + void * pCurrentBlock; int32_t blockLen; SSnapshot snapshot; int64_t sendingMS; @@ -58,28 +58,29 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender); void snapshotSenderStop(SSyncSnapshotSender *pSender); int32_t snapshotSend(SSyncSnapshotSender *pSender); int32_t snapshotReSend(SSyncSnapshotSender *pSender); -cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender); -char *snapshotSender2Str(SSyncSnapshotSender *pSender); +cJSON * snapshotSender2Json(SSyncSnapshotSender *pSender); +char * snapshotSender2Str(SSyncSnapshotSender *pSender); typedef struct SSyncSnapshotReceiver { bool start; int32_t ack; - void *pWriter; + void * pWriter; SyncTerm term; SyncTerm privateTerm; SSyncNode *pSyncNode; - int32_t replicaIndex; + SRaftId fromId; + } SSyncSnapshotReceiver; -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex); +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply); -cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); -char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); +cJSON * snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); +char * snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg); int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index ae4ccaf2d5..370798f7b9 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -386,11 +386,13 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { } } - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); - taosMemoryFree(sOld); - taosMemoryFree(sNew); + if (gRaftDetailLog) { + char* sOld = syncCfg2Str(&oldSyncCfg); + char* sNew = syncCfg2Str(&newSyncCfg); + sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); + taosMemoryFree(sOld); + taosMemoryFree(sNew); + } } // always call FpReConfigCb @@ -745,10 +747,13 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs // advance commit index to sanpshot first SSnapshot snapshot; ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex > ths->commitIndex) { - sInfo("sync event vgId:%d commit by snapshot from index:%ld to index:%ld, %s", ths->vgId, ths->commitIndex, - snapshot.lastApplyIndex, syncUtilState2String(ths->state)); + if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { + SyncIndex commitBegin = ths->commitIndex; + SyncIndex commitEnd = snapshot.lastApplyIndex; ths->commitIndex = snapshot.lastApplyIndex; + + sInfo("sync event vgId:%d commit by snapshot from index:%ld to index:%ld, %s", ths->vgId, commitBegin, + commitEnd, syncUtilState2String(ths->state)); } SyncIndex beginIndex = ths->commitIndex + 1; diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index af83b3ac94..7fc35afbb1 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -121,7 +121,7 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries syncIndexMgrLog2("recv SyncAppendEntriesReply, before pNextIndex:", ths->pNextIndex); syncIndexMgrLog2("recv SyncAppendEntriesReply, before pMatchIndex:", ths->pMatchIndex); - { + if (gRaftDetailLog) { SSnapshot snapshot; ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); sTrace("recv SyncAppendEntriesReply, before snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", @@ -147,7 +147,10 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries if (pMsg->success) { // nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - sTrace("update next match, index:%ld, success:%d", pMsg->matchIndex + 1, pMsg->success); + + if (gRaftDetailLog) { + sTrace("update next match, index:%ld, success:%d", pMsg->matchIndex + 1, pMsg->success); + } // matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); @@ -159,7 +162,9 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries } else { SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - sTrace("update next not match, begin, index:%ld, success:%d", nextIndex, pMsg->success); + if (gRaftDetailLog) { + sTrace("update next index not match, begin, index:%ld, success:%d", nextIndex, pMsg->success); + } // notice! int64, uint64 if (nextIndex > SYNC_INDEX_BEGIN) { @@ -182,12 +187,19 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); - char* s = snapshotSender2Str(pSender); - sInfo( - "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " - "sender:%s", - ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); - taosMemoryFree(s); + if (gRaftDetailLog) { + char* s = snapshotSender2Str(pSender); + sInfo( + "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld lastApplyTerm:%lu " + "sender:%s", + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, s); + taosMemoryFree(s); + } else { + sInfo( + "sync event vgId:%d snapshot send to %s:%d start sender first time, lastApplyIndex:%ld " + "lastApplyTerm:%lu", + ths->vgId, host, port, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm); + } } SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1; @@ -202,12 +214,14 @@ int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntries } syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - sTrace("update next not match, end, index:%ld, success:%d", nextIndex, pMsg->success); + if (gRaftDetailLog) { + sTrace("update next index not match, end, index:%ld, success:%d", nextIndex, pMsg->success); + } } syncIndexMgrLog2("recv SyncAppendEntriesReply, after pNextIndex:", ths->pNextIndex); syncIndexMgrLog2("recv SyncAppendEntriesReply, after pMatchIndex:", ths->pMatchIndex); - { + if (gRaftDetailLog) { SSnapshot snapshot; ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); sTrace("recv SyncAppendEntriesReply, after snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 96f60be51b..8236301f8e 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -51,18 +51,25 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { // advance commit index to sanpshot first SSnapshot snapshot; pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > pSyncNode->commitIndex) { + if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { + SyncIndex commitBegin = pSyncNode->commitIndex; + SyncIndex commitEnd = snapshot.lastApplyIndex; + pSyncNode->commitIndex = snapshot.lastApplyIndex; + sInfo("sync event vgId:%d commit by snapshot from index:%ld to index:%ld, %s", pSyncNode->vgId, pSyncNode->commitIndex, snapshot.lastApplyIndex, syncUtilState2String(pSyncNode->state)); - pSyncNode->commitIndex = snapshot.lastApplyIndex; } // update commit index SyncIndex newCommitIndex = pSyncNode->commitIndex; for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) { bool agree = syncAgree(pSyncNode, index); - sTrace("syncMaybeAdvanceCommitIndex syncAgree:%d, index:%ld, pSyncNode->commitIndex:%ld", agree, index, - pSyncNode->commitIndex); + + if (gRaftDetailLog) { + sTrace("syncMaybeAdvanceCommitIndex syncAgree:%d, index:%ld, pSyncNode->commitIndex:%ld", agree, index, + pSyncNode->commitIndex); + } + if (agree) { // term SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index); @@ -72,16 +79,21 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { if (pEntry->term == pSyncNode->pRaftStore->currentTerm) { // update commit index newCommitIndex = index; - sTrace("syncMaybeAdvanceCommitIndex maybe to update, newCommitIndex:%ld commit, pSyncNode->commitIndex:%ld", - newCommitIndex, pSyncNode->commitIndex); + + if (gRaftDetailLog) { + sTrace("syncMaybeAdvanceCommitIndex maybe to update, newCommitIndex:%ld commit, pSyncNode->commitIndex:%ld", + newCommitIndex, pSyncNode->commitIndex); + } syncEntryDestory(pEntry); break; } else { - sTrace( - "syncMaybeAdvanceCommitIndex can not commit due to term not equal, pEntry->term:%lu, " - "pSyncNode->pRaftStore->currentTerm:%lu", - pEntry->term, pSyncNode->pRaftStore->currentTerm); + if (gRaftDetailLog) { + sTrace( + "syncMaybeAdvanceCommitIndex can not commit due to term not equal, pEntry->term:%lu, " + "pSyncNode->pRaftStore->currentTerm:%lu", + pEntry->term, pSyncNode->pRaftStore->currentTerm); + } } syncEntryDestory(pEntry); @@ -92,7 +104,9 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { SyncIndex beginIndex = pSyncNode->commitIndex + 1; SyncIndex endIndex = newCommitIndex; - sTrace("syncMaybeAdvanceCommitIndex sync commit %ld", newCommitIndex); + if (gRaftDetailLog) { + sTrace("syncMaybeAdvanceCommitIndex sync commit %ld", newCommitIndex); + } // update commit index pSyncNode->commitIndex = newCommitIndex; diff --git a/source/libs/sync/src/syncEnv.c b/source/libs/sync/src/syncEnv.c index 945d59646b..e67439f8fe 100644 --- a/source/libs/sync/src/syncEnv.c +++ b/source/libs/sync/src/syncEnv.c @@ -40,7 +40,7 @@ int32_t syncEnvStart() { // gSyncEnv = doSyncEnvStart(gSyncEnv); gSyncEnv = doSyncEnvStart(); assert(gSyncEnv != NULL); - sTrace("syncEnvStart ok!"); + sTrace("sync env start ok"); return ret; } diff --git a/source/libs/sync/src/syncIndexMgr.c b/source/libs/sync/src/syncIndexMgr.c index ecc1c8f1e2..18cb55b417 100644 --- a/source/libs/sync/src/syncIndexMgr.c +++ b/source/libs/sync/src/syncIndexMgr.c @@ -119,7 +119,7 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) { char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr) { cJSON *pJson = syncIndexMgr2Json(pSyncIndexMgr); - char *serialized = cJSON_Print(pJson); + char * serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index f124cff786..d60d943a67 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -35,7 +35,7 @@ #include "syncVoteMgr.h" #include "tref.h" -bool gRaftDetailLog = true; +bool gRaftDetailLog = false; static int32_t tsNodeRefId = -1; @@ -87,7 +87,9 @@ int64_t syncOpen(const SSyncInfo* pSyncInfo) { SSyncNode* pSyncNode = syncNodeOpen(pSyncInfo); assert(pSyncNode != NULL); - syncNodeLog2("syncNodeOpen open success", pSyncNode); + if (gRaftDetailLog) { + syncNodeLog2("syncNodeOpen open success", pSyncNode); + } pSyncNode->rid = taosAddRef(tsNodeRefId, pSyncNode); if (pSyncNode->rid < 0) { @@ -174,7 +176,10 @@ int32_t syncSetStandby(int64_t rid) { int32_t syncReconfig(int64_t rid, const SSyncCfg* pSyncCfg) { int32_t ret = 0; char* configChange = syncCfg2Str((SSyncCfg*)pSyncCfg); - sInfo("==syncReconfig== newconfig:%s", configChange); + + if (gRaftDetailLog) { + sInfo("==syncReconfig== newconfig:%s", configChange); + } SRpcMsg rpcMsg = {0}; rpcMsg.msgType = TDMT_SYNC_CONFIG_CHANGE; @@ -374,13 +379,14 @@ void setHeartbeatTimerMS(int64_t rid, int32_t hbTimerMS) { } int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { - sTrace("syncPropose msgType:%d ", pMsg->msgType); + int32_t ret = TAOS_SYNC_PROPOSE_SUCCESS; - int32_t ret = TAOS_SYNC_PROPOSE_SUCCESS; SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); - if (pSyncNode == NULL) return TAOS_SYNC_PROPOSE_OTHER_ERROR; - + if (pSyncNode == NULL) { + return TAOS_SYNC_PROPOSE_OTHER_ERROR; + } assert(rid == pSyncNode->rid); + sTrace("sync event vgId:%d propose msgType:%s", pSyncNode->vgId, TMSG_INFO(pMsg->msgType)); if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { SRespStub stub; @@ -441,9 +447,11 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { assert(pSyncNode->pRaftCfg != NULL); pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg; - char* seralized = raftCfg2Str(pSyncNode->pRaftCfg); - sInfo("syncNodeOpen update config :%s", seralized); - taosMemoryFree(seralized); + if (gRaftDetailLog) { + char* seralized = raftCfg2Str(pSyncNode->pRaftCfg); + sInfo("syncNodeOpen update config :%s", seralized); + taosMemoryFree(seralized); + } raftCfgClose(pSyncNode->pRaftCfg); } @@ -614,7 +622,7 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { } // snapshot receivers - pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, 100); + pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, EMPTY_RAFT_ID); // start in syncNodeStart // start raft @@ -632,49 +640,28 @@ void syncNodeStart(SSyncNode* pSyncNode) { raftStoreNextTerm(pSyncNode->pRaftStore); syncNodeBecomeLeader(pSyncNode, "one replica start"); - syncNodeLog2("==state change become leader immediately==", pSyncNode); - // Raft 3.6.2 Committing entries from previous terms // use this now syncNodeAppendNoop(pSyncNode); syncMaybeAdvanceCommitIndex(pSyncNode); // maybe only one replica - /* - sInfo("==syncNodeStart== RestoreFinish begin 1 replica tsem_wait %p", pSyncNode); - tsem_wait(&pSyncNode->restoreSem); - sInfo("==syncNodeStart== RestoreFinish end 1 replica tsem_wait %p", pSyncNode); - */ - - /* - while (pSyncNode->restoreFinish != true) { - taosMsleep(10); + if (gRaftDetailLog) { + syncNodeLog2("==state change become leader immediately==", pSyncNode); } - */ - sInfo("==syncNodeStart== restoreFinish ok 1 replica %p vgId:%d", pSyncNode, pSyncNode->vgId); return; } syncNodeBecomeFollower(pSyncNode, "first start"); - // for test - int32_t ret = 0; + // int32_t ret = 0; // ret = syncNodeStartPingTimer(pSyncNode); - assert(ret == 0); + // assert(ret == 0); - /* - sInfo("==syncNodeStart== RestoreFinish begin multi replica tsem_wait %p", pSyncNode); - tsem_wait(&pSyncNode->restoreSem); - sInfo("==syncNodeStart== RestoreFinish end multi replica tsem_wait %p", pSyncNode); - */ - - /* - while (pSyncNode->restoreFinish != true) { - taosMsleep(10); + if (gRaftDetailLog) { + syncNodeLog2("==state change become leader immediately==", pSyncNode); } - */ - sInfo("==syncNodeStart== restoreFinish ok multi replica %p vgId:%d", pSyncNode, pSyncNode->vgId); } void syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -1135,7 +1122,10 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, bool* isDro } raftCfgPersist(pSyncNode->pRaftCfg); - syncNodeLog2("==syncNodeUpdateConfig==", pSyncNode); + + if (gRaftDetailLog) { + syncNodeLog2("==syncNodeUpdateConfig==", pSyncNode); + } } SSyncNode* syncNodeAcquire(int64_t rid) { @@ -1475,9 +1465,11 @@ void syncNodeLog(SSyncNode* pObj) { } void syncNodeLog2(char* s, SSyncNode* pObj) { - char* serialized = syncNode2Str(pObj); - sTraceLong("syncNodeLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncNode2Str(pObj); + sTraceLong("syncNodeLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ------ local funciton --------- @@ -1807,11 +1799,13 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, } } - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); - taosMemoryFree(sOld); - taosMemoryFree(sNew); + if (gRaftDetailLog) { + char* sOld = syncCfg2Str(&oldSyncCfg); + char* sNew = syncCfg2Str(&newSyncCfg); + sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); + taosMemoryFree(sOld); + taosMemoryFree(sNew); + } } // always call FpReConfigCb @@ -1834,7 +1828,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, ths->pFsm->FpRestoreFinishCb(ths->pFsm); } ths->restoreFinish = true; - sInfo("restore finish %p vgId:%d", ths, ths->vgId); + sInfo("sync event vgId:%d restore finish", ths->vgId); } } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index ff39b0b13d..08564f8293 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -122,7 +122,7 @@ int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) { syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex); syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex); logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore); - { + if (gRaftDetailLog) { SSnapshot snapshot; pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); sTrace("begin append entries peers, snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", @@ -201,7 +201,6 @@ int32_t syncNodeReplicate(SSyncNode* pSyncNode) { } int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) { - sTrace("syncNodeAppendEntries pSyncNode:%p ", pSyncNode); int32_t ret = 0; SRpcMsg rpcMsg; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index af139ccf6e..a23fe2c38a 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -20,7 +20,7 @@ #include "syncUtil.h" #include "wal.h" -static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); //---------------------------------- SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { @@ -105,15 +105,23 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender) { syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); - char *msgStr = syncSnapshotSend2Str(pMsg); char host[128]; uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); - sTrace( - "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, msgStr); - taosMemoryFree(msgStr); + + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace( + "sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm); + } syncSnapshotSendDestroy(pMsg); } @@ -185,9 +193,11 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender) { pSender->start = false; - char *s = snapshotSender2Str(pSender); - sInfo("snapshotSenderStop %s", s); - taosMemoryFree(s); + if (gRaftDetailLog) { + char *s = snapshotSender2Str(pSender); + sInfo("snapshotSenderStop %s", s); + taosMemoryFree(s); + } } // when sender receiver ack, call this function to send msg from seq @@ -227,24 +237,29 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); - char *msgStr = syncSnapshotSend2Str(pMsg); char host[128]; uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { - sTrace( - "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " - "msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, msgStr); + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace( + "sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " + "msg:%s", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm); + } } else { - sTrace( - "sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send " - "msg:%s", - pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, - pSender->snapshot.lastApplyTerm, msgStr); + sTrace("sync event vgId:%d snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu", + pSender->pSyncNode->vgId, host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, + pSender->snapshot.lastApplyTerm); } - taosMemoryFree(msgStr); syncSnapshotSendDestroy(pMsg); return 0; @@ -266,13 +281,19 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); - char *msgStr = syncSnapshotSend2Str(pMsg); char host[128]; uint16_t port; syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); - sTrace("sync event vgId:%d snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", pSender->pSyncNode->vgId, host, - port, pSender->seq, pSender->ack, msgStr); - taosMemoryFree(msgStr); + + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event vgId:%d snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", pSender->pSyncNode->vgId, + host, port, pSender->seq, pSender->ack, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot send to %s:%d resend seq:%d ack:%d", pSender->pSyncNode->vgId, host, port, + pSender->seq, pSender->ack); + } syncSnapshotSendDestroy(pMsg); } @@ -337,7 +358,7 @@ char *snapshotSender2Str(SSyncSnapshotSender *pSender) { } // ------------------------------------- -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId) { bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) && (pSyncNode->pFsm->FpSnapshotDoWrite != NULL); @@ -351,7 +372,7 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t repl pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; pReceiver->pWriter = NULL; pReceiver->pSyncNode = pSyncNode; - pReceiver->replicaIndex = replicaIndex; + pReceiver->fromId = fromId; pReceiver->term = pSyncNode->pRaftStore->currentTerm; pReceiver->privateTerm = 0; @@ -371,10 +392,11 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceiver->start; } // begin receive snapshot msg (current term, seq begin) -static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId) { pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; pReceiver->privateTerm = privateTerm; pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; + pReceiver->fromId = fromId; ASSERT(pReceiver->pWriter == NULL); int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStartWrite(pReceiver->pSyncNode->pFsm, &(pReceiver->pWriter)); @@ -383,14 +405,15 @@ static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm p // if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver // if already start, force close, start again -void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId) { if (!snapshotReceiverIsStart(pReceiver)) { // start - snapshotReceiverDoStart(pReceiver, privateTerm); + snapshotReceiverDoStart(pReceiver, privateTerm, fromId); pReceiver->start = true; } else { // already start + sInfo("snapshot recv, receiver already start"); // force close, abandon incomplete data int32_t ret = @@ -399,15 +422,15 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTer pReceiver->pWriter = NULL; // start again - snapshotReceiverDoStart(pReceiver, privateTerm); + snapshotReceiverDoStart(pReceiver, privateTerm, fromId); pReceiver->start = true; - - ASSERT(0); } - char *s = snapshotReceiver2Str(pReceiver); - sInfo("snapshotReceiverStart %s", s); - taosMemoryFree(s); + if (gRaftDetailLog) { + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStart %s", s); + taosMemoryFree(s); + } } void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply) { @@ -424,9 +447,11 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply) { ++(pReceiver->privateTerm); } - char *s = snapshotReceiver2Str(pReceiver); - sInfo("snapshotReceiverStop %s", s); - taosMemoryFree(s); + if (gRaftDetailLog) { + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStop %s", s); + taosMemoryFree(s); + } } cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { @@ -442,7 +467,22 @@ cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { snprintf(u64buf, sizeof(u64buf), "%p", pReceiver->pSyncNode); cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); - cJSON_AddNumberToObject(pRoot, "replicaIndex", pReceiver->replicaIndex); + + cJSON *pFromId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->fromId.addr); + cJSON_AddStringToObject(pFromId, "addr", u64buf); + { + uint64_t u64 = pReceiver->fromId.addr; + cJSON *pTmp = pFromId; + char host[128] = {0}; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pFromId, "vgId", pReceiver->fromId.vgId); + cJSON_AddItemToObject(pRoot, "fromId", pFromId); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->term); cJSON_AddStringToObject(pRoot, "term", u64buf); @@ -474,17 +514,23 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { // begin - snapshotReceiverStart(pReceiver, pMsg->privateTerm); + snapshotReceiverStart(pReceiver, pMsg->privateTerm, pMsg->srcId); pReceiver->ack = pMsg->seq; needRsp = true; - char *msgStr = syncSnapshotSend2Str(pMsg); char host[128]; uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - sTrace("sync event vgId:%d snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); - taosMemoryFree(msgStr); + + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event vgId:%d snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm); + } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { // end, finish FSM @@ -492,31 +538,46 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { ASSERT(writeCode == 0); pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, true); - pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, pMsg->lastIndex + 1); - char *logSimpleStr = logStoreSimple2Str(pSyncNode->pLogStore); + SSnapshot snapshot; pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + char host[128]; uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - sInfo( - "sync event vgId:%d snapshot recv from %s:%d finish, update log begin index:%ld, " - "snapshot.lastApplyIndex:%ld, " - "snapshot.lastApplyTerm:%lu, raft log:%s", - pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - logSimpleStr); - taosMemoryFree(logSimpleStr); + + if (gRaftDetailLog) { + char *logSimpleStr = logStoreSimple2Str(pSyncNode->pLogStore); + sInfo( + "sync event vgId:%d snapshot recv from %s:%d finish, update log begin index:%ld, " + "snapshot.lastApplyIndex:%ld, " + "snapshot.lastApplyTerm:%lu, raft log:%s", + pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, + logSimpleStr); + taosMemoryFree(logSimpleStr); + } else { + sInfo( + "sync event vgId:%d snapshot recv from %s:%d finish, update log begin index:%ld, " + "snapshot.lastApplyIndex:%ld, " + "snapshot.lastApplyTerm:%lu", + pSyncNode->vgId, host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } pReceiver->pWriter = NULL; snapshotReceiverStop(pReceiver, true); pReceiver->ack = pMsg->seq; needRsp = true; - char *msgStr = syncSnapshotSend2Str(pMsg); - sTrace("sync event vgId:%d snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); - taosMemoryFree(msgStr); + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event vgId:%d snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm); + } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, false); @@ -527,12 +588,17 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - char *msgStr = syncSnapshotSend2Str(pMsg); - sTrace( - "sync event vgId:%d snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", - pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); - - taosMemoryFree(msgStr); + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace( + "sync event vgId:%d snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv " + "msg:%s", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu", + pReceiver->pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm); + } } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { // transfering @@ -544,13 +610,20 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { } needRsp = true; - char *msgStr = syncSnapshotSend2Str(pMsg); char host[128]; uint16_t port; syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - sTrace("sync event vgId:%d snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", - pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); - taosMemoryFree(msgStr); + + if (gRaftDetailLog) { + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace( + "sync event vgId:%d snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + } else { + sTrace("sync event vgId:%d snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu", + pSyncNode->vgId, host, port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm); + } } else { ASSERT(0); diff --git a/source/libs/sync/test/syncSnapshotReceiverTest.cpp b/source/libs/sync/test/syncSnapshotReceiverTest.cpp index 69670f09a6..208a96daa4 100644 --- a/source/libs/sync/test/syncSnapshotReceiverTest.cpp +++ b/source/libs/sync/test/syncSnapshotReceiverTest.cpp @@ -41,7 +41,11 @@ SSyncSnapshotReceiver* createReceiver() { pSyncNode->pFsm->FpSnapshotStopWrite = SnapshotStopWrite; pSyncNode->pFsm->FpSnapshotDoWrite = SnapshotDoWrite; - SSyncSnapshotReceiver* pReceiver = snapshotReceiverCreate(pSyncNode, 2); + SRaftId id; + id.addr = syncUtilAddr2U64("1.2.3.4", 99); + id.vgId = 100; + + SSyncSnapshotReceiver* pReceiver = snapshotReceiverCreate(pSyncNode, id); pReceiver->start = true; pReceiver->ack = 20; pReceiver->pWriter = (void*)0x11; diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp index 782baf3c97..60255ee2cb 100644 --- a/source/libs/sync/test/syncTestTool.cpp +++ b/source/libs/sync/test/syncTestTool.cpp @@ -235,7 +235,6 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* } } - int64_t rid = syncOpen(&syncInfo); assert(rid > 0); From 972fee7fbc6b2d735942f38688c6e304a9f631e7 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Fri, 10 Jun 2022 17:39:30 +0800 Subject: [PATCH 18/21] refactor(sync): add rpcMsg to reconfig callback --- include/libs/sync/sync.h | 9 ++++-- source/dnode/mnode/impl/src/mndSync.c | 10 ++++++- source/dnode/vnode/src/vnd/vnodeSync.c | 11 +++++++- source/libs/sync/src/syncAppendEntries.c | 4 ++- source/libs/sync/src/syncMain.c | 28 +++++++++++++++---- .../test/syncConfigChangeSnapshotTest.cpp | 2 +- .../libs/sync/test/syncConfigChangeTest.cpp | 2 +- source/libs/sync/test/syncTestTool.cpp | 4 +-- 8 files changed, 54 insertions(+), 16 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 10ece0b219..9d1385bff2 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -83,8 +83,10 @@ typedef struct SReConfigCbMeta { SyncTerm term; SyncTerm currentTerm; SSyncCfg oldCfg; + SSyncCfg newCfg; bool isDrop; uint64_t flag; + uint64_t seqNum; } SReConfigCbMeta; typedef struct SSnapshot { @@ -106,7 +108,7 @@ typedef struct SSyncFSM { void (*FpRollBackCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta); void (*FpRestoreFinishCb)(struct SSyncFSM* pFsm); - void (*FpReConfigCb)(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta); + void (*FpReConfigCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta); int32_t (*FpGetSnapshot)(struct SSyncFSM* pFsm, SSnapshot* pSnapshot); @@ -184,7 +186,6 @@ int64_t syncOpen(const SSyncInfo* pSyncInfo); void syncStart(int64_t rid); void syncStop(int64_t rid); int32_t syncSetStandby(int64_t rid); -int32_t syncReconfig(int64_t rid, const SSyncCfg* pSyncCfg); ESyncState syncGetMyRole(int64_t rid); const char* syncGetMyRoleStr(int64_t rid); SyncTerm syncGetMyTerm(int64_t rid); @@ -194,8 +195,10 @@ int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak); bool syncEnvIsStart(); const char* syncStr(ESyncState state); bool syncIsRestoreFinish(int64_t rid); +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); -int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); +int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg); +int32_t syncReconfigRaw(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg); // to be moved to static void syncStartNormal(int64_t rid); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index a0daa72d9a..a0f722b3d2 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -96,10 +96,18 @@ void mndRestoreFinish(struct SSyncFSM *pFsm) { } } -void mndReConfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { +void mndReConfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta cbMeta) { SMnode *pMnode = pFsm->data; SSyncMgmt *pMgmt = &pMnode->syncMgmt; +#if 0 +// send response + SRpcMsg rpcMsg = {.msgType = pMsg->msgType, .contLen = pMsg->contLen, .conn.applyIndex = cbMeta.index}; + rpcMsg.pCont = rpcMallocCont(rpcMsg.contLen); + memcpy(rpcMsg.pCont, pMsg->pCont, pMsg->contLen); + syncGetAndDelRespRpc(pMnode->syncMgmt.sync, cbMeta.seqNum, &rpcMsg.info); +#endif + pMgmt->errCode = cbMeta.code; mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64, pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 816c0cfac9..4264d714a1 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -180,10 +180,18 @@ static int32_t vnodeSyncGetSnapshot(SSyncFSM *pFsm, SSnapshot *pSnapshot) { return 0; } -static void vnodeSyncReconfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { +static void vnodeSyncReconfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta cbMeta) { SVnode *pVnode = pFsm->data; vInfo("vgId:%d, sync reconfig is confirmed", TD_VID(pVnode)); +#if 0 +// send response + SRpcMsg rpcMsg = {.msgType = pMsg->msgType, .contLen = pMsg->contLen, .conn.applyIndex = cbMeta.index}; + rpcMsg.pCont = rpcMallocCont(rpcMsg.contLen); + memcpy(rpcMsg.pCont, pMsg->pCont, pMsg->contLen); + syncGetAndDelRespRpc(pVnode->sync, cbMeta.seqNum, &rpcMsg.info); +#endif + // todo rpc response here // build rpc msg // put into apply queue @@ -212,6 +220,7 @@ static void vnodeSyncCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta c memcpy(rpcMsg.pCont, pMsg->pCont, pMsg->contLen); syncGetAndDelRespRpc(pVnode->sync, cbMeta.seqNum, &rpcMsg.info); tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, &rpcMsg); + } else { char logBuf[256] = {0}; snprintf(logBuf, sizeof(logBuf), diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 370798f7b9..01c95d8241 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -401,10 +401,12 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { cbMeta.currentTerm = ths->pRaftStore->currentTerm; cbMeta.index = pEntry->index; cbMeta.term = pEntry->term; + cbMeta.newCfg = newSyncCfg; cbMeta.oldCfg = oldSyncCfg; + cbMeta.seqNum = pEntry->seqNum; cbMeta.flag = 0x11; cbMeta.isDrop = isDrop; - ths->pFsm->FpReConfigCb(ths->pFsm, newSyncCfg, cbMeta); + ths->pFsm->FpReConfigCb(ths->pFsm, &rpcMsg, cbMeta); } } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index d60d943a67..26dbf6c47a 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -175,23 +175,37 @@ int32_t syncSetStandby(int64_t rid) { int32_t syncReconfig(int64_t rid, const SSyncCfg* pSyncCfg) { int32_t ret = 0; - char* configChange = syncCfg2Str((SSyncCfg*)pSyncCfg); + char* newconfig = syncCfg2Str((SSyncCfg*)pSyncCfg); if (gRaftDetailLog) { - sInfo("==syncReconfig== newconfig:%s", configChange); + sInfo("==syncReconfig== newconfig:%s", newconfig); } SRpcMsg rpcMsg = {0}; rpcMsg.msgType = TDMT_SYNC_CONFIG_CHANGE; rpcMsg.info.noResp = 1; - rpcMsg.contLen = strlen(configChange) + 1; + rpcMsg.contLen = strlen(newconfig) + 1; rpcMsg.pCont = rpcMallocCont(rpcMsg.contLen); - snprintf(rpcMsg.pCont, rpcMsg.contLen, "%s", configChange); - taosMemoryFree(configChange); + snprintf(rpcMsg.pCont, rpcMsg.contLen, "%s", newconfig); + taosMemoryFree(newconfig); ret = syncPropose(rid, &rpcMsg, false); return ret; } +int32_t syncReconfigRaw(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg) { + int32_t ret = 0; + char* newconfig = syncCfg2Str((SSyncCfg*)pNewCfg); + + pRpcMsg->msgType = TDMT_SYNC_CONFIG_CHANGE; + pRpcMsg->info.noResp = 1; + pRpcMsg->contLen = strlen(newconfig) + 1; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + snprintf(pRpcMsg->pCont, pRpcMsg->contLen, "%s", newconfig); + taosMemoryFree(newconfig); + + return ret; +} + int32_t syncForwardToPeer(int64_t rid, const SRpcMsg* pMsg, bool isWeak) { int32_t ret = syncPropose(rid, pMsg, isWeak); return ret; @@ -1814,10 +1828,12 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, cbMeta.currentTerm = ths->pRaftStore->currentTerm; cbMeta.index = pEntry->index; cbMeta.term = pEntry->term; + cbMeta.newCfg = newSyncCfg; cbMeta.oldCfg = oldSyncCfg; + cbMeta.seqNum = pEntry->seqNum; cbMeta.flag = 0x11; cbMeta.isDrop = isDrop; - ths->pFsm->FpReConfigCb(ths->pFsm, newSyncCfg, cbMeta); + ths->pFsm->FpReConfigCb(ths->pFsm, &rpcMsg, cbMeta); } } diff --git a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp index 781c168da9..10b54d0aa4 100644 --- a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp +++ b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp @@ -146,7 +146,7 @@ int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_ void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } -void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { +void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); } diff --git a/source/libs/sync/test/syncConfigChangeTest.cpp b/source/libs/sync/test/syncConfigChangeTest.cpp index c9d9ca48aa..1e64a8a6f7 100644 --- a/source/libs/sync/test/syncConfigChangeTest.cpp +++ b/source/libs/sync/test/syncConfigChangeTest.cpp @@ -77,7 +77,7 @@ int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } -void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { +void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); } diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp index 60255ee2cb..0c8b26e9d9 100644 --- a/source/libs/sync/test/syncTestTool.cpp +++ b/source/libs/sync/test/syncTestTool.cpp @@ -146,8 +146,8 @@ int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_ void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb== pFsm:%p", pFsm); } -void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { - char* s = syncCfg2Str(&newCfg); +void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { + char* s = syncCfg2Str(&(cbMeta.newCfg)); sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu, newCfg:%s", cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term, s); taosMemoryFree(s); From d3cf0041640b59b22affbf6b8fd1c71833444b52 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Fri, 10 Jun 2022 18:03:44 +0800 Subject: [PATCH 19/21] fix(tmq): false cache --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tqRead.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index e7a744748b..5a8564bfd1 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -58,7 +58,7 @@ struct STqReadHandle { SArray* pColIdList; // SArray int32_t cachedSchemaVer; - int64_t cachedSchemaUid; + int64_t cachedSchemaSuid; SSchemaWrapper* pSchemaWrapper; STSchema* pSchema; }; diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 2ecaeff747..0c38d6442b 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -67,7 +67,7 @@ STqReadHandle* tqInitSubmitMsgScanner(SMeta* pMeta) { pReadHandle->ver = -1; pReadHandle->pColIdList = NULL; pReadHandle->cachedSchemaVer = -1; - pReadHandle->cachedSchemaUid = -1; + pReadHandle->cachedSchemaSuid = -1; pReadHandle->pSchema = NULL; pReadHandle->pSchemaWrapper = NULL; pReadHandle->tbIdHash = NULL; @@ -130,7 +130,8 @@ int32_t tqRetrieveDataBlock(SArray** ppCols, STqReadHandle* pHandle, uint64_t* p // TODO set to real sversion /*int32_t sversion = 1;*/ int32_t sversion = htonl(pHandle->pBlock->sversion); - if (pHandle->cachedSchemaVer != sversion || pHandle->cachedSchemaUid != pHandle->msgIter.suid) { + if (pHandle->cachedSchemaSuid == 0 || pHandle->cachedSchemaVer != sversion || + pHandle->cachedSchemaSuid != pHandle->msgIter.suid) { pHandle->pSchema = metaGetTbTSchema(pHandle->pVnodeMeta, pHandle->msgIter.uid, sversion); if (pHandle->pSchema == NULL) { tqWarn("cannot found tsschema for table: uid: %ld (suid: %ld), version %d, possibly dropped table", @@ -150,7 +151,7 @@ int32_t tqRetrieveDataBlock(SArray** ppCols, STqReadHandle* pHandle, uint64_t* p return -1; } pHandle->cachedSchemaVer = sversion; - pHandle->cachedSchemaUid = pHandle->msgIter.suid; + pHandle->cachedSchemaSuid = pHandle->msgIter.suid; } STSchema* pTschema = pHandle->pSchema; From c038848f72ed74e905fe2665499e9957c1f710e8 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 10 Jun 2022 18:33:24 +0800 Subject: [PATCH 20/21] feature: output results in merge-interval agg buf when exhausted input stream --- source/libs/executor/src/executorimpl.c | 1 + source/libs/executor/src/timewindowoperator.c | 87 +++++++++++-------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 4634cc9f47..726be6d0a2 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2002,6 +2002,7 @@ int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosi } releaseBufPage(pBuf, page); + pBlock->info.rows += pRow->numOfRows; return 0; } diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index a3c01629d6..c1c504a400 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3202,6 +3202,7 @@ typedef struct SMergeIntervalAggOperatorInfo { bool hasGroupId; uint64_t groupId; SSDataBlock* prefetchedBlock; + bool inputBlocksFinished; } SMergeIntervalAggOperatorInfo; void destroyMergeIntervalOperatorInfo(void* param, int32_t numOfOutput) { @@ -3223,7 +3224,7 @@ static int32_t outputPrevIntervalResult(SOperatorInfo* pOperatorInfo, uint64_t t return 0; } - if (ascScan && newWin->skey > prevWin->ekey || (!ascScan) && newWin->skey < prevWin->ekey) { + if (newWin == NULL || (ascScan && newWin->skey > prevWin->ekey || (!ascScan) && newWin->skey < prevWin->ekey) ) { SET_RES_WINDOW_KEY(iaInfo->aggSup.keyBuf, &prevWin->skey, TSDB_KEYSIZE, tableGroupId); SResultRowPosition* p1 = (SResultRowPosition*)taosHashGet(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); @@ -3233,9 +3234,13 @@ static int32_t outputPrevIntervalResult(SOperatorInfo* pOperatorInfo, uint64_t t pOperatorInfo->numOfExprs, iaInfo->binfo.rowCellInfoOffset, pResultBlock, pTaskInfo); taosHashRemove(iaInfo->aggSup.pResultRowHashTable, iaInfo->aggSup.keyBuf, GET_RES_WINDOW_KEY_LEN(TSDB_KEYSIZE)); - - taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), newWin, sizeof(STimeWindow)); + if (newWin == NULL) { + taosHashRemove(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId)); + } else { + taosHashPut(miaInfo->groupIntervalHash, &tableGroupId, sizeof(tableGroupId), newWin, sizeof(STimeWindow)); + } } + return 0; } @@ -3343,47 +3348,57 @@ static SSDataBlock* doMergeIntervalAgg(SOperatorInfo* pOperator) { SSDataBlock* pRes = iaInfo->binfo.pRes; blockDataCleanup(pRes); + blockDataEnsureCapacity(pRes, pOperator->resultInfo.capacity); - SOperatorInfo* downstream = pOperator->pDownstream[0]; - int32_t scanFlag = MAIN_SCAN; - while (1) { - SSDataBlock* pBlock = NULL; - if (miaInfo->prefetchedBlock == NULL) { - pBlock = downstream->fpSet.getNextFn(downstream); - } else { - pBlock = miaInfo->prefetchedBlock; - miaInfo->groupId = pBlock->info.groupId; + if (!miaInfo->inputBlocksFinished) { + SOperatorInfo* downstream = pOperator->pDownstream[0]; + int32_t scanFlag = MAIN_SCAN; + while (1) { + SSDataBlock* pBlock = NULL; + if (miaInfo->prefetchedBlock == NULL) { + pBlock = downstream->fpSet.getNextFn(downstream); + } else { + pBlock = miaInfo->prefetchedBlock; + miaInfo->groupId = pBlock->info.groupId; + } + + if (pBlock == NULL) { + miaInfo->inputBlocksFinished = true; + break; + } + + if (!miaInfo->hasGroupId) { + miaInfo->hasGroupId = true; + miaInfo->groupId = pBlock->info.groupId; + } else if (miaInfo->groupId != pBlock->info.groupId) { + miaInfo->prefetchedBlock = pBlock; + break; + } + + getTableScanInfo(pOperator, &iaInfo->order, &scanFlag); + setInputDataBlock(pOperator, iaInfo->binfo.pCtx, pBlock, iaInfo->order, scanFlag, true); + STableQueryInfo* pTableQueryInfo = iaInfo->pCurrent; + + setIntervalQueryRange(pTableQueryInfo, pBlock->info.window.skey, &pTaskInfo->window); + doMergeIntervalAggImpl(pOperator, &iaInfo->binfo.resultRowInfo, pBlock, scanFlag, pRes); + + if (pRes->info.rows >= pOperator->resultInfo.threshold) { + break; + } } - if (pBlock == NULL) { - break; - } - - if (!miaInfo->hasGroupId) { - miaInfo->hasGroupId = true; - miaInfo->groupId = pBlock->info.groupId; - } else if (miaInfo->groupId != pBlock->info.groupId) { - miaInfo->prefetchedBlock = pBlock; - break; - } - - getTableScanInfo(pOperator, &iaInfo->order, &scanFlag); - setInputDataBlock(pOperator, iaInfo->binfo.pCtx, pBlock, iaInfo->order, scanFlag, true); - STableQueryInfo* pTableQueryInfo = iaInfo->pCurrent; - - setIntervalQueryRange(pTableQueryInfo, pBlock->info.window.skey, &pTaskInfo->window); - doMergeIntervalAggImpl(pOperator, &iaInfo->binfo.resultRowInfo, pBlock, scanFlag, pRes); - - if (pRes->info.rows >= pOperator->resultInfo.threshold) { - break; + pRes->info.groupId = miaInfo->groupId; + } else { + void* p = taosHashIterate(miaInfo->groupIntervalHash, NULL); + if (p != NULL) { + size_t len = 0; + uint64_t* pKey = taosHashGetKey(p, &len); + outputPrevIntervalResult(pOperator, *pKey, pRes, NULL); } } - pRes->info.groupId = miaInfo->groupId; if (pRes->info.rows == 0) { doSetOperatorCompleted(pOperator); - } else { - blockDataUpdateTsWindow(pRes, 0); } size_t rows = pRes->info.rows; From be88d8027f9202e8c848a064029f3a7bd434fc3d Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 10 Jun 2022 19:31:48 +0800 Subject: [PATCH 21/21] enh(query): refactor function merge code to provide common interface --- source/libs/function/src/builtinsimpl.c | 171 +++++++++++++----------- 1 file changed, 91 insertions(+), 80 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 1eafd3c649..ff838eb9c9 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2103,8 +2103,49 @@ int32_t apercentileFunction(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } +static void apercentileTransferInfo(SAPercentileInfo* pInput, SAPercentileInfo* pOutput) { + pOutput->percent = pInput->percent; + pOutput->algo = pInput->algo; + if (pOutput->algo == APERCT_ALGO_TDIGEST) { + buildTDigestInfo(pInput); + tdigestAutoFill(pInput->pTDigest, COMPRESSION); + + if(pInput->pTDigest->num_centroids == 0 && pInput->pTDigest->num_buffered_pts == 0) { + return; + } + + buildTDigestInfo(pOutput); + TDigest *pTDigest = pOutput->pTDigest; + + if(pTDigest->num_centroids <= 0) { + memcpy(pTDigest, pInput->pTDigest, (size_t)TDIGEST_SIZE(COMPRESSION)); + tdigestAutoFill(pTDigest, COMPRESSION); + } else { + tdigestMerge(pTDigest, pInput->pTDigest); + } + } else { + buildHistogramInfo(pInput); + if (pInput->pHisto->numOfElems <= 0) { + return; + } + + buildHistogramInfo(pOutput); + SHistogramInfo *pHisto = pOutput->pHisto; + + if (pHisto->numOfElems <= 0) { + memcpy(pHisto, pInput->pHisto, sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1)); + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + } else { + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + SHistogramInfo *pRes = tHistogramMerge(pHisto, pInput->pHisto, MAX_HISTOGRAM_BIN); + memcpy(pHisto, pRes, sizeof(SHistogramInfo) + sizeof(SHistBin) * MAX_HISTOGRAM_BIN); + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + tHistogramDestroy(&pRes); + } + } +} + int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx) { - int32_t numOfElems = 0; SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); SInputColumnInfoData* pInput = &pCtx->input; @@ -2113,60 +2154,14 @@ int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx) { ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY); SAPercentileInfo* pInfo = GET_ROWCELL_INTERBUF(pResInfo); - SAPercentileInfo* pInputInfo; int32_t start = pInput->startRowIndex; - for (int32_t i = start; i < pInput->numOfRows + start; ++i) { - //if (colDataIsNull_s(pCol, i)) { - // continue; - //} - numOfElems += 1; - char* data = colDataGetData(pCol, i); + char* data = colDataGetData(pCol, start); + SAPercentileInfo* pInputInfo = (SAPercentileInfo *)varDataVal(data); - pInputInfo = (SAPercentileInfo *)varDataVal(data); - } + apercentileTransferInfo(pInputInfo, pInfo); - pInfo->percent = pInputInfo->percent; - pInfo->algo = pInputInfo->algo; - if (pInfo->algo == APERCT_ALGO_TDIGEST) { - buildTDigestInfo(pInputInfo); - tdigestAutoFill(pInputInfo->pTDigest, COMPRESSION); - - if(pInputInfo->pTDigest->num_centroids == 0 && pInputInfo->pTDigest->num_buffered_pts == 0) { - return TSDB_CODE_SUCCESS; - } - - buildTDigestInfo(pInfo); - TDigest *pTDigest = pInfo->pTDigest; - - if(pTDigest->num_centroids <= 0) { - memcpy(pTDigest, pInputInfo->pTDigest, (size_t)TDIGEST_SIZE(COMPRESSION)); - tdigestAutoFill(pTDigest, COMPRESSION); - } else { - tdigestMerge(pTDigest, pInputInfo->pTDigest); - } - } else { - buildHistogramInfo(pInputInfo); - if (pInputInfo->pHisto->numOfElems <= 0) { - return TSDB_CODE_SUCCESS; - } - - buildHistogramInfo(pInfo); - SHistogramInfo *pHisto = pInfo->pHisto; - - if (pHisto->numOfElems <= 0) { - memcpy(pHisto, pInputInfo->pHisto, sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1)); - pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - } else { - pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - SHistogramInfo *pRes = tHistogramMerge(pHisto, pInputInfo->pHisto, MAX_HISTOGRAM_BIN); - memcpy(pHisto, pRes, sizeof(SHistogramInfo) + sizeof(SHistBin) * MAX_HISTOGRAM_BIN); - pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - tHistogramDestroy(&pRes); - } - } - - SET_VAL(pResInfo, numOfElems, 1); + SET_VAL(pResInfo, 1, 1); return TSDB_CODE_SUCCESS; } @@ -3049,6 +3044,17 @@ _spread_over: return TSDB_CODE_SUCCESS; } +static void spreadTransferInfo(SSpreadInfo* pInput, SSpreadInfo* pOutput) { + pOutput->hasResult = pInput->hasResult; + if (pInput->max > pOutput->max) { + pOutput->max = pInput->max; + } + + if (pInput->min < pOutput->min) { + pOutput->min = pInput->min; + } +} + int32_t spreadFunctionMerge(SqlFunctionCtx *pCtx) { SInputColumnInfoData* pInput = &pCtx->input; SColumnInfoData* pCol = pInput->pData[0]; @@ -3061,14 +3067,7 @@ int32_t spreadFunctionMerge(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, start); pInputInfo = (SSpreadInfo *)varDataVal(data); - pInfo->hasResult = pInputInfo->hasResult; - if (pInputInfo->max > pInfo->max) { - pInfo->max = pInputInfo->max; - } - - if (pInputInfo->min < pInfo->min) { - pInfo->min = pInputInfo->min; - } + spreadTransferInfo(pInputInfo, pInfo); SET_VAL(GET_RES_INFO(pCtx), 1, 1); @@ -3206,6 +3205,17 @@ _elapsed_over: return TSDB_CODE_SUCCESS; } +static void elapsedTransferInfo(SElapsedInfo* pInput, SElapsedInfo* pOutput) { + pOutput->timeUnit = pInput->timeUnit; + if (pOutput->min > pInput->min) { + pOutput->min = pInput->min; + } + + if (pOutput->max < pInput->max) { + pOutput->max = pInput->max; + } +} + int32_t elapsedFunctionMerge(SqlFunctionCtx *pCtx) { SInputColumnInfoData* pInput = &pCtx->input; SColumnInfoData* pCol = pInput->pData[0]; @@ -3217,14 +3227,7 @@ int32_t elapsedFunctionMerge(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, start); SElapsedInfo* pInputInfo = (SElapsedInfo *)varDataVal(data); - pInfo->timeUnit = pInputInfo->timeUnit; - if (pInfo->min > pInputInfo->min) { - pInfo->min = pInputInfo->min; - } - - if (pInfo->max < pInputInfo->max) { - pInfo->max = pInputInfo->max; - } + elapsedTransferInfo(pInputInfo, pInfo); SET_VAL(GET_RES_INFO(pCtx), 1, 1); return TSDB_CODE_SUCCESS; @@ -3470,6 +3473,17 @@ int32_t histogramFunction(SqlFunctionCtx *pCtx) { return TSDB_CODE_SUCCESS; } +static void histogramTransferInfo(SHistoFuncInfo* pInput, SHistoFuncInfo* pOutput) { + pOutput->normalized = pInput->normalized; + pOutput->numOfBins = pInput->numOfBins; + pOutput->totalCount += pInput->totalCount; + for (int32_t k = 0; k < pOutput->numOfBins; ++k) { + pOutput->bins[k].lower = pInput->bins[k].lower; + pOutput->bins[k].upper = pInput->bins[k].upper; + pOutput->bins[k].count += pInput->bins[k].count; + } +} + int32_t histogramFunctionMerge(SqlFunctionCtx *pCtx) { SInputColumnInfoData* pInput = &pCtx->input; SColumnInfoData* pCol = pInput->pData[0]; @@ -3481,14 +3495,7 @@ int32_t histogramFunctionMerge(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, start); SHistoFuncInfo* pInputInfo = (SHistoFuncInfo *)varDataVal(data); - pInfo->normalized = pInputInfo->normalized; - pInfo->numOfBins = pInputInfo->numOfBins; - pInfo->totalCount += pInputInfo->totalCount; - for (int32_t k = 0; k < pInfo->numOfBins; ++k) { - pInfo->bins[k].lower = pInputInfo->bins[k].lower; - pInfo->bins[k].upper = pInputInfo->bins[k].upper; - pInfo->bins[k].count += pInputInfo->bins[k].count; - } + histogramTransferInfo(pInputInfo, pInfo); SET_VAL(GET_RES_INFO(pCtx), pInfo->numOfBins, pInfo->numOfBins); return TSDB_CODE_SUCCESS; @@ -3676,6 +3683,14 @@ int32_t hllFunction(SqlFunctionCtx *pCtx) { return TSDB_CODE_SUCCESS; } +static void hllTransferInfo(SHLLInfo* pInput, SHLLInfo* pOutput) { + for (int32_t k = 0; k < HLL_BUCKETS; ++k) { + if (pOutput->buckets[k] < pInput->buckets[k]) { + pOutput->buckets[k] = pInput->buckets[k]; + } + } +} + int32_t hllFunctionMerge(SqlFunctionCtx *pCtx) { SInputColumnInfoData* pInput = &pCtx->input; SColumnInfoData* pCol = pInput->pData[0]; @@ -3687,11 +3702,7 @@ int32_t hllFunctionMerge(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, start); SHLLInfo* pInputInfo = (SHLLInfo *)varDataVal(data); - for (int32_t k = 0; k < HLL_BUCKETS; ++k) { - if (pInfo->buckets[k] < pInputInfo->buckets[k]) { - pInfo->buckets[k] = pInputInfo->buckets[k]; - } - } + hllTransferInfo(pInputInfo, pInfo); SET_VAL(GET_RES_INFO(pCtx), 1, 1); return TSDB_CODE_SUCCESS;