From 9863879951d28bab71553f07bb9b425e01c52095 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 09:21:43 +0800 Subject: [PATCH 01/23] refactor: do some internal refactor. --- source/libs/executor/inc/executorimpl.h | 1 + source/libs/executor/src/executorimpl.c | 77 ++++++++----------------- 2 files changed, 26 insertions(+), 52 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index cd9f29978d..94361554d0 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -265,6 +265,7 @@ typedef struct SExchangeInfo { SLoadRemoteDataInfo loadInfo; uint64_t self; SLimitInfo limitInfo; + int64_t openedTs; // start exec time stamp } SExchangeInfo; typedef struct SScanInfo { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index b7c3eed069..00733fa21f 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1847,40 +1847,41 @@ static void* setAllSourcesCompleted(SOperatorInfo* pOperator, int64_t startTs) { return NULL; } -static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeInfo* pExchangeInfo, - SExecTaskInfo* pTaskInfo) { - int32_t code = 0; - int64_t startTs = taosGetTimestampUs(); - size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + +static int32_t getCompletedSources(const SArray* pArray) { + size_t total = taosArrayGetSize(pArray); int32_t completed = 0; - for (int32_t k = 0; k < totalSources; ++k) { - SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k); + for (int32_t k = 0; k < total; ++k) { + SSourceDataInfo* p = taosArrayGet(pArray, k); if (p->status == EX_SOURCE_DATA_EXHAUSTED) { completed += 1; } } + return completed; +} + +static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeInfo* pExchangeInfo, + SExecTaskInfo* pTaskInfo) { + int32_t code = 0; + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSourceDataInfo); + int32_t completed = getCompletedSources(pExchangeInfo->pSourceDataInfo); if (completed == totalSources) { - setAllSourcesCompleted(pOperator, startTs); + setAllSourcesCompleted(pOperator, pExchangeInfo->openedTs); return; } while (1) { -// printf("1\n"); tsem_wait(&pExchangeInfo->ready); -// printf("2\n"); for (int32_t i = 0; i < totalSources; ++i) { SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, i); if (pDataInfo->status == EX_SOURCE_DATA_EXHAUSTED) { -// printf("========:%d is completed\n", i); continue; } -// printf("index:%d - status:%d\n", i, pDataInfo->status); if (pDataInfo->status != EX_SOURCE_DATA_READY) { -// printf("-----------%d, status:%d, continue\n", i, pDataInfo->status); continue; } @@ -1896,27 +1897,18 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo; if (pRsp->numOfRows == 0) { pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; -// printf("%d completed, try next\n", i); - qDebug("%s vgId:%d, taskId:0x%" PRIx64 " execId:%d index:%d completed, rowsOfSource:%" PRIu64 - ", totalRows:%" PRIu64 ", completed:%d try next %d/%" PRIzu, + ", totalRows:%" PRIu64 ", try next %d/%" PRIzu, GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pDataInfo->totalRows, - pExchangeInfo->loadInfo.totalRows, completed, i + 1, totalSources); + pExchangeInfo->loadInfo.totalRows, i + 1, totalSources); taosMemoryFreeClear(pDataInfo->pRsp); - -// if (completed == totalSources) { -// return; -// } else { -// break; -// } - break; + break; } SRetrieveTableRsp* pRetrieveRsp = pDataInfo->pRsp; int32_t index = 0; char* pStart = pRetrieveRsp->data; while (index++ < pRetrieveRsp->numOfBlocks) { - printf("results, numOfBLock: %d\n", pRetrieveRsp->numOfBlocks); SSDataBlock* pb = createOneDataBlock(pExchangeInfo->pDummyBlock, false); code = extractDataBlockFromFetchRsp(pb, pStart, NULL, &pStart); if (code != 0) { @@ -1927,25 +1919,16 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn taosArrayPush(pExchangeInfo->pResultBlockList, &pb); } - updateLoadRemoteInfo(pLoadInfo, pRetrieveRsp->numOfRows, pRetrieveRsp->compLen, startTs, pOperator); + updateLoadRemoteInfo(pLoadInfo, pRetrieveRsp->numOfRows, pRetrieveRsp->compLen, pExchangeInfo->openedTs, pOperator); -// int32_t completed = 0; if (pRsp->completed == 1) { pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; - -// for (int32_t k = 0; k < totalSources; ++k) { -// SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k); -// if (p->status == EX_SOURCE_DATA_EXHAUSTED) { -// completed += 1; -// } -// } - qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d index:%d completed, blocks:%d, numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 - ", total:%.2f Kb, completed:%d try next %d/%" PRIzu, + ", total:%.2f Kb, try next %d/%" PRIzu, GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pRsp->numOfBlocks, pRsp->numOfRows, pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize / 1024.0, - completed, i + 1, totalSources); + i + 1, totalSources); } else { qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d blocks:%d, numOfRows:%d, totalRows:%" PRIu64 ", total:%.2f Kb", @@ -1963,23 +1946,12 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn goto _error; } } - -// if (completed == totalSources) { -// setAllSourcesCompleted(pOperator, startTs); -// } - return; - } + } // end loop - int32_t completed = 0; - for (int32_t k = 0; k < totalSources; ++k) { - SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k); - if (p->status == EX_SOURCE_DATA_EXHAUSTED) { - completed += 1; - } - } - - if (completed == totalSources) { + int32_t complete1 = getCompletedSources(pExchangeInfo->pSourceDataInfo); + if (complete1 == totalSources) { + qDebug("all sources are completed, %s", GET_TASKID(pTaskInfo)); return; } } @@ -2098,6 +2070,7 @@ static int32_t prepareLoadRemoteData(SOperatorInfo* pOperator) { if (code != TSDB_CODE_SUCCESS) { return code; } + pExchangeInfo->openedTs = taosGetTimestampUs(); } OPTR_SET_OPENED(pOperator); From 2621dec4aeefe14ee647cfc714a4e82c7b174b6b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 12:02:44 +0800 Subject: [PATCH 02/23] enh(query): improve the perf. --- source/libs/function/src/builtinsimpl.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 1501bb6d67..d24393a766 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1555,6 +1555,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value +#if 0 if ((*val) == pData[i]) { continue; } @@ -1565,6 +1566,23 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } +#endif + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } } numOfElems += 1; From 032208e36ab272c425a6a74eed0ca42d52ba7f78 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 14:01:29 +0800 Subject: [PATCH 03/23] enh(query): optimize the query perf. --- source/libs/function/src/builtinsimpl.c | 199 +++++++++++++++--------- 1 file changed, 126 insertions(+), 73 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index d24393a766..ab972bdab0 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1273,14 +1273,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1304,14 +1310,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1335,14 +1347,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1366,14 +1384,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1399,14 +1423,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1430,14 +1460,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1461,14 +1497,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1492,14 +1534,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1524,14 +1572,20 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { pBuf->assign = true; } else { // ignore the equivalent data value - if ((*val) == pData[i]) { - continue; - } - - if ((*val < pData[i]) ^ isMinFunc) { - *val = pData[i]; - if (pCtx->subsidiaries.num > 0) { - updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + // NOTE: An faster version to avoid one additional comparison with FPU. + if (isMinFunc) { // min + if (*val < pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } + } + } else { // max + if (*val > pData[i]) { + *val = pData[i]; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); + } } } } @@ -1554,7 +1608,6 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { } pBuf->assign = true; } else { - // ignore the equivalent data value #if 0 if ((*val) == pData[i]) { continue; From f66eded1e181596f85a1ee3aa7b3e928d6e572e1 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Tue, 8 Nov 2022 14:40:44 +0800 Subject: [PATCH 04/23] fix(stream): state window, error result after deleting data --- include/libs/stream/streamState.h | 12 +- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/timewindowoperator.c | 33 ++- source/libs/stream/src/streamState.c | 243 ++++++++++++------ tests/script/tsim/stream/session0.sim | 2 +- 5 files changed, 195 insertions(+), 97 deletions(-) diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 225466a015..9443df5e14 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -60,19 +60,19 @@ int32_t streamStateDel(SStreamState* pState, const SWinKey* key); int32_t streamStateClear(SStreamState* pState); void streamStateSetNumber(SStreamState* pState, int32_t number); -int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen); +int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, int32_t* pVLen); int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen); int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen); int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key); int32_t streamStateSessionClear(SStreamState* pState); -int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, const void** pVal, int32_t* pVLen); +int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen); int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); -int32_t streamStateSessionGetKey(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey); +int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* range, SSessionKey* curKey); SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSessionKey* key); SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, const SSessionKey* key); -SStreamStateCur* streamStateSessionGetCur(SStreamState* pState, const SSessionKey* key); +SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, const SSessionKey* key); int32_t streamStateFillPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen); int32_t streamStateFillGet(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen); @@ -99,7 +99,9 @@ int32_t streamStateSeekLast(SStreamState* pState, SStreamStateCur* pCur); int32_t streamStateCurNext(SStreamState* pState, SStreamStateCur* pCur); int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur); -// char* streamStateSessionDump(SStreamState* pState); +#if 0 +char* streamStateSessionDump(SStreamState* pState); +#endif #ifdef __cplusplus } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6d13048123..7faf75d5cd 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1438,7 +1438,7 @@ static int32_t generateSessionScanRange(SStreamScanInfo* pInfo, SSDataBlock* pSr uint64_t groupId = getGroupIdByData(pInfo, uidCol[i], startData[i], version); // gap must be 0. SSessionKey startWin = {0}; - getCurSessionWindow(pInfo->windowSup.pStreamAggSup, startData[i], endData[i], groupId, &startWin); + getCurSessionWindow(pInfo->windowSup.pStreamAggSup, startData[i], startData[i], groupId, &startWin); if (IS_INVALID_SESSION_WIN_KEY(startWin)) { // window has been closed. continue; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 7b6ed5b67d..e5b49acf0b 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3550,7 +3550,7 @@ void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT pKey->win.skey = startTs; pKey->win.ekey = endTs; pKey->groupId = groupId; - int32_t code = streamStateSessionGetKey(pAggSup->pState, pKey, pKey); + int32_t code = streamStateSessionGetKeyByRange(pAggSup->pState, pKey, pKey); if (code != TSDB_CODE_SUCCESS) { SET_SESSION_WIN_KEY_INVALID(pKey); } @@ -3561,10 +3561,11 @@ bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->session void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, SResultWindowInfo* pCurWin) { pCurWin->sessionWin.groupId = groupId; - pCurWin->sessionWin.win.skey = startTs - pAggSup->gap; - pCurWin->sessionWin.win.ekey = endTs + pAggSup->gap; + pCurWin->sessionWin.win.skey = startTs; + pCurWin->sessionWin.win.ekey = endTs; int32_t size = pAggSup->resultRowSize; - int32_t code = streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, &pCurWin->pOutputBuf, &size); + int32_t code = + streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, pAggSup->gap, &pCurWin->pOutputBuf, &size); if (code == TSDB_CODE_SUCCESS) { pCurWin->isOutput = true; } else { @@ -3575,7 +3576,7 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT int32_t getSessionWinBuf(SStreamAggSupporter* pAggSup, SStreamStateCur* pCur, SResultWindowInfo* pWinInfo) { int32_t size = 0; - int32_t code = streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, (const void**)&pWinInfo->pOutputBuf, &size); + int32_t code = streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, &pWinInfo->pOutputBuf, &size); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -3680,7 +3681,7 @@ SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* setSessionWinOutputInfo(pStUpdated, pNextWin); int32_t size = 0; pNextWin->sessionWin = pCurWin->sessionWin; - int32_t code = streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, (const void**)&pNextWin->pOutputBuf, &size); + int32_t code = streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, &pNextWin->pOutputBuf, &size); if (code != TSDB_CODE_SUCCESS) { SET_SESSION_WIN_INVALID(*pNextWin); } @@ -3894,9 +3895,11 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, j); SStreamSessionAggOperatorInfo* pChInfo = pChild->info; SStreamAggSupporter* pChAggSup = &pChInfo->streamAggSup; - SStreamStateCur* pCur = streamStateSessionGetCur(pChAggSup->pState, pWinKey); - SResultRow* pResult = NULL; - SResultRow* pChResult = NULL; + SSessionKey chWinKey = *pWinKey; + chWinKey.win.ekey = chWinKey.win.skey; + SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pChAggSup->pState, &chWinKey); + SResultRow* pResult = NULL; + SResultRow* pChResult = NULL; while (1) { SResultWindowInfo childWin = {0}; childWin.sessionWin = *pWinKey; @@ -4112,6 +4115,12 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { initGroupResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); +#if 0 + char* pBuf = streamStateSessionDump(pAggSup->pState); + qDebug("===stream===final session%s", pBuf); + taosMemoryFree(pBuf); +#endif + doBuildDeleteDataBlock(pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); if (pInfo->pDelRes->info.rows > 0) { printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "final session" : "single session"); @@ -4306,6 +4315,12 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { initGroupResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); +#if 0 + char* pBuf = streamStateSessionDump(pAggSup->pState); + qDebug("===stream===semi session%s", pBuf); + taosMemoryFree(pBuf); +#endif + doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); if (pBInfo->pRes->info.rows > 0) { printDataBlock(pBInfo->pRes, "semi session"); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index cf2ead364b..ccb0dd4a92 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -29,7 +29,7 @@ typedef struct SStateSessionKey { int64_t opNum; } SStateSessionKey; -static inline int sessionKeyCmpr(const SSessionKey* pWin1, const SSessionKey* pWin2) { +static inline int sessionRangeKeyCmpr(const SSessionKey* pWin1, const SSessionKey* pWin2) { if (pWin1->groupId > pWin2->groupId) { return 1; } else if (pWin1->groupId < pWin2->groupId) { @@ -45,6 +45,28 @@ static inline int sessionKeyCmpr(const SSessionKey* pWin1, const SSessionKey* pW return 0; } +static inline int sessionWinKeyCmpr(const SSessionKey* pWin1, const SSessionKey* pWin2) { + if (pWin1->groupId > pWin2->groupId) { + return 1; + } else if (pWin1->groupId < pWin2->groupId) { + return -1; + } + + if (pWin1->win.skey > pWin2->win.skey) { + return 1; + } else if (pWin1->win.skey < pWin2->win.skey) { + return -1; + } + + if (pWin1->win.ekey > pWin2->win.ekey) { + return 1; + } else if (pWin1->win.ekey < pWin2->win.ekey) { + return -1; + } + + return 0; +} + static inline int stateSessionKeyCmpr(const void* pKey1, int kLen1, const void* pKey2, int kLen2) { SStateSessionKey* pWin1 = (SStateSessionKey*)pKey1; SStateSessionKey* pWin2 = (SStateSessionKey*)pKey2; @@ -55,7 +77,7 @@ static inline int stateSessionKeyCmpr(const void* pKey1, int kLen1, const void* return -1; } - return sessionKeyCmpr(&pWin1->key, &pWin2->key); + return sessionWinKeyCmpr(&pWin1->key, &pWin2->key); } static inline int stateKeyCmpr(const void* pKey1, int kLen1, const void* pKey2, int kLen2) { @@ -400,7 +422,6 @@ SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key SStateKey sKey = {.key = *key, .opNum = pState->number}; int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateKey), &c) < 0) { - tdbTbcClose(pCur->pCur); streamStateFreeCur(pCur); return NULL; } @@ -426,7 +447,6 @@ SStreamStateCur* streamStateFillSeekKeyNext(SStreamState* pState, const SWinKey* int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, key, sizeof(SWinKey), &c) < 0) { - tdbTbcClose(pCur->pCur); streamStateFreeCur(pCur); return NULL; } @@ -452,7 +472,6 @@ SStreamStateCur* streamStateFillSeekKeyPrev(SStreamState* pState, const SWinKey* int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, key, sizeof(SWinKey), &c) < 0) { - tdbTbcClose(pCur->pCur); streamStateFreeCur(pCur); return NULL; } @@ -496,33 +515,18 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, cons return tdbTbUpsert(pState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), value, vLen, &pState->txn); } -SStreamStateCur* streamStateSessionGetRanomCur(SStreamState* pState, const SSessionKey* key) { - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); - if (pCur == NULL) return NULL; - tdbTbcOpen(pState->pSessionStateDb, &pCur->pCur, NULL); - - int32_t c = -2; - SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c); - if (c != 0) { - streamStateFreeCur(pCur); - return NULL; - } - pCur->number = pState->number; - return pCur; -} - int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { - SStreamStateCur* pCur = streamStateSessionGetRanomCur(pState, key); + SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key); + SSessionKey resKey = *key; void* tmp = NULL; - if (streamStateSessionGetKVByCur(pCur, key, (const void**)&tmp, pVLen) == 0) { + int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen); + if (code == 0) { + *key = resKey; *pVal = tdbRealloc(NULL, *pVLen); memcpy(*pVal, tmp, *pVLen); - streamStateFreeCur(pCur); - return 0; } streamStateFreeCur(pCur); - return -1; + return code; } int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { @@ -544,7 +548,6 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { - tdbTbcClose(pCur->pCur); streamStateFreeCur(pCur); return NULL; } @@ -558,6 +561,34 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons return pCur; } +SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, const SSessionKey* key) { + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + if (pCur == NULL) { + return NULL; + } + pCur->number = pState->number; + if (tdbTbcOpen(pState->pSessionStateDb, &pCur->pCur, NULL) < 0) { + streamStateFreeCur(pCur); + return NULL; + } + + SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; + int32_t c = 0; + if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { + streamStateFreeCur(pCur); + return NULL; + } + + if (c <= 0) return pCur; + + if (tdbTbcMoveToNext(pCur->pCur) < 0) { + streamStateFreeCur(pCur); + return NULL; + } + + return pCur; +} + SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSessionKey* key) { SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { @@ -572,7 +603,6 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { - tdbTbcClose(pCur->pCur); streamStateFreeCur(pCur); return NULL; } @@ -586,13 +616,13 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess return pCur; } -int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, const void** pVal, int32_t* pVLen) { +int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen) { if (!pCur) { return -1; } - const SStateSessionKey* pKTmp = NULL; - int32_t kLen; - if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) { + SStateSessionKey* pKTmp = NULL; + int32_t kLen; + if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, (const void**)pVal, pVLen) < 0) { return -1; } if (pKTmp->opNum != pCur->number) { @@ -606,15 +636,15 @@ int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, c } int32_t streamStateSessionClear(SStreamState* pState) { - SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; - streamStateSessionPut(pState, &key, NULL, 0); - SStreamStateCur* pCur = streamStateSessionSeekKeyNext(pState, &key); + SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; + SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, &key); while (1) { SSessionKey delKey = {0}; void* buf = NULL; int32_t size = 0; - int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, buf, &size); + int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); if (code == 0) { + ASSERT(size > 0); memset(buf, 0, size); streamStateSessionPut(pState, &delKey, buf, size); } else { @@ -623,63 +653,106 @@ int32_t streamStateSessionClear(SStreamState* pState) { streamStateCurNext(pState, pCur); } streamStateFreeCur(pCur); - streamStateSessionDel(pState, &key); return 0; } -SStreamStateCur* streamStateSessionGetCur(SStreamState* pState, const SSessionKey* key) { - SStreamStateCur* pCur = streamStateSessionGetRanomCur(pState, key); - SSessionKey resKey = *key; - while (1) { +int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + if (pCur == NULL) { + return -1; + } + pCur->number = pState->number; + if (tdbTbcOpen(pState->pSessionStateDb, &pCur->pCur, NULL) < 0) { + streamStateFreeCur(pCur); + return -1; + } + + SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; + int32_t c = 0; + if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { + streamStateFreeCur(pCur); + return -1; + } + + SSessionKey resKey = *key; + int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); + if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) { + *curKey = resKey; + streamStateFreeCur(pCur); + return code; + } + + if (c > 0) { + streamStateCurNext(pState, pCur); + code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); + if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) { + *curKey = resKey; + streamStateFreeCur(pCur); + return code; + } + } else if (c < 0) { streamStateCurPrev(pState, pCur); - SSessionKey tmpKey = *key; - int32_t code = streamStateSessionGetKVByCur(pCur, &tmpKey, NULL, 0); - if (code == 0 && sessionKeyCmpr(key, &tmpKey) == 0) { - resKey = tmpKey; - } else { - break; + code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); + if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) { + *curKey = resKey; + streamStateFreeCur(pCur); + return code; } } + streamStateFreeCur(pCur); - return streamStateSessionGetRanomCur(pState, &resKey); + return -1; } -int32_t streamStateSessionGetKey(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { - SStreamStateCur* pCur = streamStateSessionGetRanomCur(pState, key); - SSessionKey resKey = *key; - int32_t res = -1; - while (1) { - SSessionKey tmpKey = *key; - int32_t code = streamStateSessionGetKVByCur(pCur, &tmpKey, NULL, 0); - if (code == 0 && sessionKeyCmpr(key, &tmpKey) == 0) { - res = 0; - resKey = tmpKey; - streamStateCurPrev(pState, pCur); - } else { - break; +int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, + int32_t* pVLen) { + // todo refactor + int32_t res = 0; + SSessionKey originKey = *key; + SSessionKey searchKey = *key; + searchKey.win.skey = key->win.skey - gap; + searchKey.win.ekey = key->win.ekey + gap; + int32_t valSize = *pVLen; + void* tmp = tdbRealloc(NULL, valSize); + if (!tmp) { + return -1; + } + + SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); + int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); + if (code == 0) { + if (sessionRangeKeyCmpr(&searchKey, key) == 0) { + memcpy(tmp, *pVal, valSize); + streamStateSessionDel(pState, key); + goto _end; + } + streamStateCurNext(pState, pCur); + } else { + *key = originKey; + streamStateFreeCur(pCur); + pCur = streamStateSessionSeekKeyNext(pState, key); + } + + code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); + if (code == 0) { + if (sessionRangeKeyCmpr(&searchKey, key) == 0) { + memcpy(tmp, *pVal, valSize); + streamStateSessionDel(pState, key); + goto _end; } } - *curKey = resKey; + + *key = originKey; + res = 1; + memset(tmp, 0, valSize); + +_end: + + *pVal = tmp; streamStateFreeCur(pCur); return res; } -int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { - // todo refactor - SStreamStateCur* pCur = streamStateSessionGetRanomCur(pState, key); - int32_t size = *pVLen; - void* tmp = NULL; - *pVal = tdbRealloc(NULL, size); - memset(*pVal, 0, size); - if (streamStateSessionGetKVByCur(pCur, key, (const void**)&tmp, pVLen) == 0) { - memcpy(*pVal, tmp, *pVLen); - streamStateFreeCur(pCur); - return 0; - } - streamStateFreeCur(pCur); - return 1; -} - int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, state_key_cmpr_fn fn, void** pVal, int32_t* pVLen) { // todo refactor @@ -692,16 +765,18 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch } SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); - int32_t code = streamStateSessionGetKVByCur(pCur, key, (const void**)pVal, pVLen); + int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); if (code == 0) { if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) { memcpy(tmp, *pVal, valSize); + streamStateSessionDel(pState, key); goto _end; } void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); + streamStateSessionDel(pState, key); goto _end; } @@ -712,11 +787,12 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch pCur = streamStateSessionSeekKeyNext(pState, key); } - code = streamStateSessionGetKVByCur(pCur, key, (const void**)pVal, pVLen); + code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); if (code == 0) { void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); + streamStateSessionDel(pState, key); goto _end; } } @@ -746,8 +822,11 @@ char* streamStateSessionDump(SStreamState* pState) { tdbTbcMoveToFirst(pCur->pCur); SSessionKey key = {0}; - int32_t code = streamStateSessionGetKVByCur(pCur, &key, NULL, 0); + void* buf = NULL; + int32_t bufSize = 0; + int32_t code = streamStateSessionGetKVByCur(pCur, &key, &buf, &bufSize); if (code != 0) { + streamStateFreeCur(pCur); return NULL; } @@ -762,12 +841,14 @@ char* streamStateSessionDump(SStreamState* pState) { key = (SSessionKey){0}; code = streamStateSessionGetKVByCur(pCur, &key, NULL, 0); if (code != 0) { + streamStateFreeCur(pCur); return dumpBuf; } len += snprintf(dumpBuf + len, size - len, "||s:%15" PRId64 ",", key.win.skey); len += snprintf(dumpBuf + len, size - len, "e:%15" PRId64 ",", key.win.ekey); len += snprintf(dumpBuf + len, size - len, "g:%15" PRId64 "||", key.groupId); } + streamStateFreeCur(pCur); return dumpBuf; } #endif diff --git a/tests/script/tsim/stream/session0.sim b/tests/script/tsim/stream/session0.sim index afae8ef5da..5e95428e0a 100644 --- a/tests/script/tsim/stream/session0.sim +++ b/tests/script/tsim/stream/session0.sim @@ -201,7 +201,7 @@ if $loop_count == 10 then endi if $rows != 1 then - print ======$rows + print =====rows=$rows goto loop2 endi From b0a54d3fab495963f8c253b5332b9e17b5ad77eb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 14:54:42 +0800 Subject: [PATCH 05/23] enh(query): improve the perf. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 ++ source/libs/function/src/builtinsimpl.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index bfde5b3076..41d50f119d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -985,6 +985,8 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn uint8_t* p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; memcpy(pColData->pData, p, remain * tDataTypes[pData->type].bytes); + ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); // make sure it is aligned to 8bit + // null value exists, check one-by-one if (pData->flag != HAS_VALUE) { for (int32_t j = pDumpInfo->rowIndex; rowIndex < remain; j += step, rowIndex++) { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index ab972bdab0..00a148ea25 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -906,6 +906,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { case TSDB_DATA_TYPE_FLOAT: { float* plist = (float*)pCol->pData; + float val = 0; for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) { if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) { continue; @@ -913,8 +914,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { numOfElem += 1; pAvgRes->count += 1; - pAvgRes->sum.dsum += plist[i]; + val += plist[i]; } + pAvgRes->sum.dsum = val; break; } From f85d66f5eee9846157a5d100c96e96fb7319f6ee Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 16:59:00 +0800 Subject: [PATCH 06/23] enh(query): opt query perf. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 3 ++- source/libs/function/src/builtinsimpl.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 41d50f119d..a20742c55f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -985,7 +985,8 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn uint8_t* p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; memcpy(pColData->pData, p, remain * tDataTypes[pData->type].bytes); - ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); // make sure it is aligned to 8bit + // make sure it is aligned to 8bit + ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); // null value exists, check one-by-one if (pData->flag != HAS_VALUE) { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 00a148ea25..541e2e1954 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3096,7 +3096,7 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { numOfElems++; char* data = colDataGetData(pInputCol, i); - TSKEY cts = getRowPTs(pInput->pPTS, i); + TSKEY cts = *(TSKEY*) colDataGetData(pInput->pPTS, i); if (pResInfo->numOfRes == 0 || pInfo->ts < cts) { doSaveCurrentVal(pCtx, i, cts, type, data); pResInfo->numOfRes = 1; From 6e1a2b1e54ddca0372e1995a896e6d9cb4d9ff1d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 17:38:09 +0800 Subject: [PATCH 07/23] enh(query): improve the perf. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 14 +++++++++----- source/libs/function/src/builtinsimpl.c | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index a20742c55f..77e6dc5649 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -957,11 +957,15 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn int32_t i = 0; SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i); if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - if (asc) { - memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], remain * sizeof(int64_t)); - } else { - for (int32_t j = pDumpInfo->rowIndex; rowIndex < remain; j += step) { - colDataAppendInt64(pColData, rowIndex++, &pBlockData->aTSKEY[j]); + memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], remain * sizeof(int64_t)); + + if (!asc) { // reverse the array list + int32_t mid = remain / 2; + TSKEY* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[i]; + pts[j] = pts[remain - j - 1]; + pts[remain - j - 1] = t; } } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 541e2e1954..1f2fe5c658 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3103,8 +3103,9 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { } } #endif + + // save selectivity value for column consisted of all null values if (numOfElems == 0) { - // save selectivity value for column consisted of all null values firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo); } SET_VAL(pResInfo, numOfElems, 1); From cc3e97da36bbe3bea44428f24c4a0ff4320bff21 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 17:41:29 +0800 Subject: [PATCH 08/23] fix(query): fix an typo. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 77e6dc5649..f185e31de6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -959,11 +959,13 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) { memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], remain * sizeof(int64_t)); + + // todo: opt perf by extract the loop if (!asc) { // reverse the array list - int32_t mid = remain / 2; + int32_t mid = remain >> 1u; TSKEY* pts = (int64_t*)pColData->pData; for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[i]; + int64_t t = pts[j]; pts[j] = pts[remain - j - 1]; pts[remain - j - 1] = t; } From 663138ba1ef0959b9a77bb199070445d3c764db2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 18:33:57 +0800 Subject: [PATCH 09/23] enh(query): optimize the query perf. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 129 +++++++++++++++++++------ 1 file changed, 99 insertions(+), 30 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index f185e31de6..2ffbe78803 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -891,7 +891,7 @@ static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int or } } -int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SDataBlk* pBlock, int32_t pos) { +static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SDataBlk* pBlock, int32_t pos) { // NOTE: reverse the order to find the end position in data block int32_t endPos = -1; bool asc = ASCENDING_TRAVERSE(pReader->order); @@ -908,6 +908,26 @@ int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SData return endPos; } +static void copyPrimaryTsCol(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + if (asc) { + memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + memcpy(pColData->pData, &pBlockData->aTSKEY[startIndex], dumpedRows * sizeof(int64_t)); + + // todo: opt perf by extract the loop + // reverse the array list + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + } +} + static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; @@ -947,30 +967,17 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn } endIndex += step; - int32_t remain = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); - if (remain > pReader->capacity) { // output buffer check - remain = pReader->capacity; + int32_t dumpedRows = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); + if (dumpedRows > pReader->capacity) { // output buffer check + dumpedRows = pReader->capacity; } + int32_t i = 0; int32_t rowIndex = 0; - int32_t i = 0; SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i); if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], remain * sizeof(int64_t)); - - - // todo: opt perf by extract the loop - if (!asc) { // reverse the array list - int32_t mid = remain >> 1u; - TSKEY* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[remain - j - 1]; - pts[remain - j - 1] = t; - } - } - + copyPrimaryTsCol(pBlockData, pDumpInfo, pColData, dumpedRows, asc); i += 1; } @@ -985,18 +992,80 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn colIndex += 1; } else if (pData->cid == pColData->info.colId) { if (pData->flag == HAS_NONE || pData->flag == HAS_NULL || pData->flag == (HAS_NULL | HAS_NONE)) { - colDataAppendNNULL(pColData, 0, remain); + colDataAppendNNULL(pColData, 0, dumpedRows); } else { - if (IS_NUMERIC_TYPE(pColData->info.type) && asc) { - uint8_t* p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; - memcpy(pColData->pData, p, remain * tDataTypes[pData->type].bytes); + if (IS_MATHABLE_TYPE(pColData->info.type)) { + + uint8_t* p = NULL; + if (asc) { + p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + p = pData->pData + tDataTypes[pData->type].bytes * startIndex; + } // make sure it is aligned to 8bit ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); + + if (!asc) { + switch(pColData->info.type) { + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: + { + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: { + int32_t mid = dumpedRows >> 1u; + int8_t* pts = (int8_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: { + int32_t mid = dumpedRows >> 1u; + int16_t* pts = (int16_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: { + int32_t mid = dumpedRows >> 1u; + int32_t* pts = (int32_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + } + } // null value exists, check one-by-one if (pData->flag != HAS_VALUE) { - for (int32_t j = pDumpInfo->rowIndex; rowIndex < remain; j += step, rowIndex++) { + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { uint8_t v = tColDataGetBitValue(pData, j); if (v == 0 || v == 1) { colDataSetNull_f(pColData->nullbitmap, rowIndex); @@ -1004,7 +1073,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn } } } else { - for (int32_t j = pDumpInfo->rowIndex; rowIndex < remain; j += step) { + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { tColDataGetValue(pData, j, &cv); doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); } @@ -1014,7 +1083,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn colIndex += 1; i += 1; } else { // the specified column does not exist in file block, fill with null data - colDataAppendNNULL(pColData, 0, remain); + colDataAppendNNULL(pColData, 0, dumpedRows); i += 1; } } @@ -1022,12 +1091,12 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn // fill the mis-matched columns with null value while (i < numOfOutputCols) { pColData = taosArrayGet(pResBlock->pDataBlock, i); - colDataAppendNNULL(pColData, 0, remain); + colDataAppendNNULL(pColData, 0, dumpedRows); i += 1; } - pResBlock->info.rows = remain; - pDumpInfo->rowIndex += step * remain; + pResBlock->info.rows = dumpedRows; + pDumpInfo->rowIndex += step * dumpedRows; // check if current block are all handled if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pBlock->nRow) { @@ -1046,7 +1115,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, dumpedRows, unDumpedRows, pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); return TSDB_CODE_SUCCESS; From ecf3e2c00064e1ca7cc985d5989253d0655293b4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 18:44:28 +0800 Subject: [PATCH 10/23] enh(query): optimize the perf. --- source/libs/function/src/builtinsimpl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 1f2fe5c658..198892a99e 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3096,9 +3096,9 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { numOfElems++; char* data = colDataGetData(pInputCol, i); - TSKEY cts = *(TSKEY*) colDataGetData(pInput->pPTS, i); - if (pResInfo->numOfRes == 0 || pInfo->ts < cts) { - doSaveCurrentVal(pCtx, i, cts, type, data); + TSKEY* cts = (TSKEY*) colDataGetData(pInput->pPTS, i); + if (pResInfo->numOfRes == 0 || pInfo->ts < (*cts)) { + doSaveCurrentVal(pCtx, i, *cts, type, data); pResInfo->numOfRes = 1; } } From dc2c419f2212c36b26a183fc7187fc6d10d46ef1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 8 Nov 2022 19:01:35 +0800 Subject: [PATCH 11/23] enh(query): optimize the query perf. --- source/libs/function/src/builtinsimpl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 198892a99e..5f3d9c138e 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3088,6 +3088,7 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { } } #else + int64_t* pts = (int64_t*)pInput->pPTS->pData; for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { if (pInputCol->hasNull && colDataIsNull(pInputCol, pInput->totalRows, i, pColAgg)) { continue; @@ -3096,9 +3097,9 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { numOfElems++; char* data = colDataGetData(pInputCol, i); - TSKEY* cts = (TSKEY*) colDataGetData(pInput->pPTS, i); - if (pResInfo->numOfRes == 0 || pInfo->ts < (*cts)) { - doSaveCurrentVal(pCtx, i, *cts, type, data); + TSKEY cts = pts[i]; + if (pResInfo->numOfRes == 0 || pInfo->ts < cts) { + doSaveCurrentVal(pCtx, i, cts, type, data); pResInfo->numOfRes = 1; } } @@ -3285,11 +3286,13 @@ int32_t lastRowFunction(SqlFunctionCtx* pCtx) { } } #else + + int64_t* pts = (int64_t*)pInput->pPTS->pData; for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { char* data = colDataGetData(pInputCol, i); - TSKEY cts = getRowPTs(pInput->pPTS, i); - numOfElems++; + TSKEY cts = pts[i]; + numOfElems++; if (pResInfo->numOfRes == 0 || pInfo->ts < cts) { doSaveLastrow(pCtx, data, i, cts, pInfo); pResInfo->numOfRes = 1; From a4e96ca8332152faa36d922d68459b203f9b04f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 8 Nov 2022 22:43:12 +0800 Subject: [PATCH 12/23] refact: adjust sync resp mgr --- source/libs/sync/inc/syncRespMgr.h | 9 +-- source/libs/sync/inc/syncTools.h | 3 - source/libs/sync/src/syncMain.c | 109 +++------------------------ source/libs/sync/src/syncRespMgr.c | 114 +++++++++++++++-------------- 4 files changed, 74 insertions(+), 161 deletions(-) diff --git a/source/libs/sync/inc/syncRespMgr.h b/source/libs/sync/inc/syncRespMgr.h index 22e1005e5c..9026ecb66e 100644 --- a/source/libs/sync/inc/syncRespMgr.h +++ b/source/libs/sync/inc/syncRespMgr.h @@ -41,13 +41,12 @@ typedef struct SSyncRespMgr { SSyncRespMgr *syncRespMgrCreate(void *data, int64_t ttl); void syncRespMgrDestroy(SSyncRespMgr *pObj); -int64_t syncRespMgrAdd(SSyncRespMgr *pObj, SRespStub *pStub); -int32_t syncRespMgrDel(SSyncRespMgr *pObj, uint64_t index); -int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub); -int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub); +uint64_t syncRespMgrAdd(SSyncRespMgr *pObj, const SRespStub *pStub); +int32_t syncRespMgrDel(SSyncRespMgr *pObj, uint64_t seq); +int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t seq, SRespStub *pStub); +int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t seq, SRpcHandleInfo *pInfo); void syncRespClean(SSyncRespMgr *pObj); void syncRespCleanRsp(SSyncRespMgr *pObj); -void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncTools.h b/source/libs/sync/inc/syncTools.h index 932432d1f8..193579030f 100644 --- a/source/libs/sync/inc/syncTools.h +++ b/source/libs/sync/inc/syncTools.h @@ -26,9 +26,6 @@ typedef struct SRaftId { SyncGroupId vgId; } SRaftId; -// for compatibility, the same as syncPropose -int32_t syncForwardToPeer(int64_t rid, SRpcMsg* pMsg, bool isWeak); - // ------------------ for debug ------------------- void syncRpcMsgPrint(SRpcMsg* pMsg); void syncRpcMsgPrint2(char* s, SRpcMsg* pMsg); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 0d7306e4fd..c42e75a9c0 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -452,11 +452,6 @@ int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader) { return ret; } -int32_t syncForwardToPeer(int64_t rid, SRpcMsg* pMsg, bool isWeak) { - int32_t ret = syncPropose(rid, pMsg, isWeak); - return ret; -} - SSyncState syncGetState(int64_t rid) { SSyncState state = {.state = TAOS_SYNC_STATE_ERROR}; @@ -558,109 +553,27 @@ SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapsho return lastIndex; } -#if 0 -SyncTerm syncGetMyTerm(int64_t rid) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return TAOS_SYNC_STATE_ERROR; - } - ASSERT(rid == pSyncNode->rid); - SyncTerm term = pSyncNode->pRaftStore->currentTerm; - - syncNodeRelease(pSyncNode); - return term; -} - -SyncIndex syncGetLastIndex(int64_t rid) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return SYNC_INDEX_INVALID; - } - ASSERT(rid == pSyncNode->rid); - SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); - - syncNodeRelease(pSyncNode); - return lastIndex; -} - -SyncIndex syncGetCommitIndex(int64_t rid) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return SYNC_INDEX_INVALID; - } - ASSERT(rid == pSyncNode->rid); - SyncIndex cmtIndex = pSyncNode->commitIndex; - - syncNodeRelease(pSyncNode); - return cmtIndex; -} - -SyncGroupId syncGetVgId(int64_t rid) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return TAOS_SYNC_STATE_ERROR; - } - ASSERT(rid == pSyncNode->rid); - SyncGroupId vgId = pSyncNode->vgId; - - syncNodeRelease(pSyncNode); - return vgId; -} - -void syncGetEpSet(int64_t rid, SEpSet* pEpSet) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - memset(pEpSet, 0, sizeof(*pEpSet)); - return; - } - ASSERT(rid == pSyncNode->rid); - pEpSet->numOfEps = 0; - for (int32_t i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { - snprintf(pEpSet->eps[i].fqdn, sizeof(pEpSet->eps[i].fqdn), "%s", (pSyncNode->pRaftCfg->cfg.nodeInfo)[i].nodeFqdn); - pEpSet->eps[i].port = (pSyncNode->pRaftCfg->cfg.nodeInfo)[i].nodePort; - (pEpSet->numOfEps)++; - sInfo("vgId:%d, sync get epset: index:%d %s:%d", pSyncNode->vgId, i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port); - } - pEpSet->inUse = pSyncNode->pRaftCfg->cfg.myIndex; - sInfo("vgId:%d, sync get epset in-use:%d", pSyncNode->vgId, pEpSet->inUse); - - syncNodeRelease(pSyncNode); -} -#endif - void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - memset(pEpSet, 0, sizeof(*pEpSet)); - return; - } - pEpSet->numOfEps = 0; + + SSyncNode* pSyncNode = syncNodeAcquire(rid); + if (pSyncNode == NULL) return; + for (int32_t i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { - snprintf(pEpSet->eps[i].fqdn, sizeof(pEpSet->eps[i].fqdn), "%s", (pSyncNode->pRaftCfg->cfg.nodeInfo)[i].nodeFqdn); - pEpSet->eps[i].port = (pSyncNode->pRaftCfg->cfg.nodeInfo)[i].nodePort; - (pEpSet->numOfEps)++; - sInfo("vgId:%d, sync get retry epset: index:%d %s:%d", pSyncNode->vgId, i, pEpSet->eps[i].fqdn, - pEpSet->eps[i].port); + SEp* pEp = &pEpSet->eps[i]; + tstrncpy(pEp->fqdn, pSyncNode->pRaftCfg->cfg.nodeInfo[i].nodeFqdn, TSDB_FQDN_LEN); + pEp->port = (pSyncNode->pRaftCfg->cfg.nodeInfo)[i].nodePort; + pEpSet->numOfEps++; + sInfo("vgId:%d, sync get retry epset, index:%d %s:%d", pSyncNode->vgId, i, pEp->fqdn, pEp->port); } if (pEpSet->numOfEps > 0) { pEpSet->inUse = (pSyncNode->pRaftCfg->cfg.myIndex + 1) % pEpSet->numOfEps; } - sInfo("vgId:%d, sync get retry epset in-use:%d", pSyncNode->vgId, pEpSet->inUse); + sInfo("vgId:%d, sync get retry epset numOfEps:%d inUse:%d", pSyncNode->vgId, pEpSet->numOfEps, pEpSet->inUse); syncNodeRelease(pSyncNode); } -static void syncGetAndDelRespRpc(SSyncNode* pSyncNode, uint64_t index, SRpcHandleInfo* pInfo) { - SRespStub stub; - int32_t ret = syncRespMgrGetAndDel(pSyncNode->pSyncRespMgr, index, &stub); - if (ret == 1) { - *pInfo = stub.rpcMsg.info; - } - - sTrace("vgId:%d, get seq:%" PRIu64 " rpc handle:%p", pSyncNode->vgId, index, pInfo->handle); -} - int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { SSyncNode* pSyncNode = syncNodeAcquire(rid); if (pSyncNode == NULL) { @@ -2759,7 +2672,7 @@ int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endInde .flag = flag, }; - syncGetAndDelRespRpc(ths, cbMeta.seqNum, &rpcMsg.info); + syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta); } } diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 56d5e944b8..de8f1927ae 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -13,21 +13,22 @@ * along with this program. If not, see . */ +#define _DEFAULT_SOURCE #include "syncRespMgr.h" #include "syncRaftEntry.h" #include "syncRaftStore.h" SSyncRespMgr *syncRespMgrCreate(void *data, int64_t ttl) { - SSyncRespMgr *pObj = (SSyncRespMgr *)taosMemoryMalloc(sizeof(SSyncRespMgr)); + SSyncRespMgr *pObj = taosMemoryCalloc(1, sizeof(SSyncRespMgr)); if (pObj == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - memset(pObj, 0, sizeof(SSyncRespMgr)); pObj->pRespHash = taosHashInit(sizeof(uint64_t), taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); - ASSERT(pObj->pRespHash != NULL); + if (pObj->pRespHash == NULL) return NULL; + pObj->ttl = ttl; pObj->data = data; pObj->seqNum = 0; @@ -38,93 +39,84 @@ SSyncRespMgr *syncRespMgrCreate(void *data, int64_t ttl) { void syncRespMgrDestroy(SSyncRespMgr *pObj) { if (pObj != NULL) { - taosThreadMutexLock(&(pObj->mutex)); + taosThreadMutexLock(&pObj->mutex); taosHashCleanup(pObj->pRespHash); - taosThreadMutexUnlock(&(pObj->mutex)); + taosThreadMutexUnlock(&pObj->mutex); taosThreadMutexDestroy(&(pObj->mutex)); taosMemoryFree(pObj); } } -int64_t syncRespMgrAdd(SSyncRespMgr *pObj, SRespStub *pStub) { - taosThreadMutexLock(&(pObj->mutex)); +uint64_t syncRespMgrAdd(SSyncRespMgr *pObj, const SRespStub *pStub) { + taosThreadMutexLock(&pObj->mutex); - uint64_t keyCode = ++(pObj->seqNum); - taosHashPut(pObj->pRespHash, &keyCode, sizeof(keyCode), pStub, sizeof(SRespStub)); + uint64_t seq = ++(pObj->seqNum); + int32_t code = taosHashPut(pObj->pRespHash, &seq, sizeof(uint64_t), pStub, sizeof(SRespStub)); + sNTrace(pObj->data, "save message handle:%p, type:%s seq:%" PRIu64 " code:0x%x", pStub->rpcMsg.info.handle, + TMSG_INFO(pStub->rpcMsg.msgType), seq, code); - sNTrace(pObj->data, "save message handle, type:%s seq:%" PRIu64 " handle:%p", TMSG_INFO(pStub->rpcMsg.msgType), - keyCode, pStub->rpcMsg.info.handle); - taosThreadMutexUnlock(&(pObj->mutex)); - return keyCode; + taosThreadMutexUnlock(&pObj->mutex); + return seq; } -int32_t syncRespMgrDel(SSyncRespMgr *pObj, uint64_t index) { - taosThreadMutexLock(&(pObj->mutex)); +int32_t syncRespMgrDel(SSyncRespMgr *pObj, uint64_t seq) { + taosThreadMutexLock(&pObj->mutex); - taosHashRemove(pObj->pRespHash, &index, sizeof(index)); + int32_t code = taosHashRemove(pObj->pRespHash, &seq, sizeof(seq)); + sNTrace(pObj->data, "remove message handle, seq:%" PRIu64 " code:%d", seq, code); - taosThreadMutexUnlock(&(pObj->mutex)); - return 0; + taosThreadMutexUnlock(&pObj->mutex); + return code; } -int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub) { - taosThreadMutexLock(&(pObj->mutex)); +int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t seq, SRespStub *pStub) { + taosThreadMutexLock(&pObj->mutex); - void *pTmp = taosHashGet(pObj->pRespHash, &index, sizeof(index)); + SRespStub *pTmp = taosHashGet(pObj->pRespHash, &seq, sizeof(uint64_t)); if (pTmp != NULL) { memcpy(pStub, pTmp, sizeof(SRespStub)); + sNTrace(pObj->data, "get message handle, type:%s seq:%" PRIu64 " handle:%p", TMSG_INFO(pStub->rpcMsg.msgType), seq, + pStub->rpcMsg.info.handle); - sNTrace(pObj->data, "get message handle, type:%s seq:%" PRIu64 " handle:%p", TMSG_INFO(pStub->rpcMsg.msgType), - index, pStub->rpcMsg.info.handle); - taosThreadMutexUnlock(&(pObj->mutex)); + taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object } - taosThreadMutexUnlock(&(pObj->mutex)); + + taosThreadMutexUnlock(&pObj->mutex); return 0; // get none object } -int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t index, SRespStub *pStub) { - taosThreadMutexLock(&(pObj->mutex)); +int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t seq, SRpcHandleInfo *pInfo) { + taosThreadMutexLock(&pObj->mutex); - void *pTmp = taosHashGet(pObj->pRespHash, &index, sizeof(index)); - if (pTmp != NULL) { - memcpy(pStub, pTmp, sizeof(SRespStub)); + SRespStub *pStub = taosHashGet(pObj->pRespHash, &seq, sizeof(uint64_t)); + if (pStub != NULL) { + *pInfo = pStub->rpcMsg.info; + sNTrace(pObj->data, "get-and-del message handle:%p, type:%s seq:%" PRIu64, pStub->rpcMsg.info.handle, + TMSG_INFO(pStub->rpcMsg.msgType), seq); + taosHashRemove(pObj->pRespHash, &seq, sizeof(uint64_t)); - sNTrace(pObj->data, "get-and-del message handle, type:%s seq:%" PRIu64 " handle:%p", - TMSG_INFO(pStub->rpcMsg.msgType), index, pStub->rpcMsg.info.handle); - taosHashRemove(pObj->pRespHash, &index, sizeof(index)); - taosThreadMutexUnlock(&(pObj->mutex)); + taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object } - taosThreadMutexUnlock(&(pObj->mutex)); + + taosThreadMutexUnlock(&pObj->mutex); return 0; // get none object } -void syncRespCleanRsp(SSyncRespMgr *pObj) { - taosThreadMutexLock(&(pObj->mutex)); - syncRespCleanByTTL(pObj, -1, true); - taosThreadMutexUnlock(&(pObj->mutex)); -} - -void syncRespClean(SSyncRespMgr *pObj) { - taosThreadMutexLock(&(pObj->mutex)); - syncRespCleanByTTL(pObj, pObj->ttl, false); - taosThreadMutexUnlock(&(pObj->mutex)); -} - -void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { +static void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { SRespStub *pStub = (SRespStub *)taosHashIterate(pObj->pRespHash, NULL); int cnt = 0; int sum = 0; SSyncNode *pSyncNode = pObj->data; - SArray *delIndexArray = taosArrayInit(0, sizeof(uint64_t)); - ASSERT(delIndexArray != NULL); - sDebug("vgId:%d, resp mgr begin clean by ttl", pSyncNode->vgId); + SArray *delIndexArray = taosArrayInit(4, sizeof(uint64_t)); + if (delIndexArray == NULL) return; + sDebug("vgId:%d, resp mgr begin clean by ttl", pSyncNode->vgId); while (pStub) { size_t len; - void * key = taosHashGetKey(pStub, &len); + void *key = taosHashGetKey(pStub, &len); uint64_t *pSeqNum = (uint64_t *)key; sum++; @@ -149,15 +141,15 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { pStub->rpcMsg.contLen = 0; // TODO: and make rpcMsg body, call commit cb - // pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &(pStub->rpcMsg), cbMeta); + // pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &pStub->rpcMsg, cbMeta); pStub->rpcMsg.code = TSDB_CODE_SYN_NOT_LEADER; if (pStub->rpcMsg.info.handle != NULL) { - tmsgSendRsp(&(pStub->rpcMsg)); + tmsgSendRsp(&pStub->rpcMsg); } } - pStub = (SRespStub *)taosHashIterate(pObj->pRespHash, pStub); + pStub = taosHashIterate(pObj->pRespHash, pStub); } int32_t arraySize = taosArrayGetSize(delIndexArray); @@ -170,3 +162,15 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { } taosArrayDestroy(delIndexArray); } + +void syncRespCleanRsp(SSyncRespMgr *pObj) { + taosThreadMutexLock(&pObj->mutex); + syncRespCleanByTTL(pObj, -1, true); + taosThreadMutexUnlock(&pObj->mutex); +} + +void syncRespClean(SSyncRespMgr *pObj) { + taosThreadMutexLock(&pObj->mutex); + syncRespCleanByTTL(pObj, pObj->ttl, false); + taosThreadMutexUnlock(&pObj->mutex); +} From 1c8a2d696dd038ba213926b2252170b2d0d89f79 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 8 Nov 2022 22:59:05 +0800 Subject: [PATCH 13/23] fix: send response on enqueue msg failed --- source/libs/sync/src/syncMain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 0d7306e4fd..c6dc62415e 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -719,8 +719,11 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) { sNTrace(pSyncNode, "propose message, type:%s", TMSG_INFO(pMsg->msgType)); ret = (*pSyncNode->syncEqMsg)(pSyncNode->msgcb, &rpcMsg); if (ret != 0) { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + if (terrno != 0) ret = terrno; sError("vgId:%d, failed to enqueue msg since %s", pSyncNode->vgId, terrstr()); + syncRespMgrDel(pSyncNode->pSyncRespMgr, seqNum); + SRpcMsg rsp = {.code = ret, .info = pMsg->info}; + tmsgSendRsp(&rsp); } } From bf1aca1b9ea8adff3d8c59c1effe913a95c28204 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 8 Nov 2022 23:00:14 +0800 Subject: [PATCH 14/23] fix: send response on enqueue msg failed --- source/libs/sync/src/syncMain.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index c6dc62415e..19015c00b9 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -719,11 +719,8 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) { sNTrace(pSyncNode, "propose message, type:%s", TMSG_INFO(pMsg->msgType)); ret = (*pSyncNode->syncEqMsg)(pSyncNode->msgcb, &rpcMsg); if (ret != 0) { - if (terrno != 0) ret = terrno; sError("vgId:%d, failed to enqueue msg since %s", pSyncNode->vgId, terrstr()); syncRespMgrDel(pSyncNode->pSyncRespMgr, seqNum); - SRpcMsg rsp = {.code = ret, .info = pMsg->info}; - tmsgSendRsp(&rsp); } } From 68f2f9211691be18871802d48233115a730edec1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 9 Nov 2022 09:07:21 +0800 Subject: [PATCH 15/23] fix(query): fix the syntax errors. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 +- source/libs/executor/src/executorimpl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 4914deed30..0f970b6830 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -1117,7 +1117,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, dumpedRows, unDumpedRows, pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 52c038c500..7d662f8784 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1897,7 +1897,7 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn if (pRsp->numOfRows == 0) { pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; qDebug("%s vgId:%d, taskId:0x%" PRIx64 " execId:%d index:%d completed, rowsOfSource:%" PRIu64 - ", totalRows:%" PRIu64 ", completed:%d try next %d/%" PRIzu, + ", totalRows:%" PRIu64 ", try next %d/%" PRIzu, GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pDataInfo->totalRows, pExchangeInfo->loadInfo.totalRows, i + 1, totalSources); taosMemoryFreeClear(pDataInfo->pRsp); From e3aabacf9a8a6f7e3da1cb65e7454cc3622b3ad4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 9 Nov 2022 09:47:21 +0800 Subject: [PATCH 16/23] fix(query): fix error in min/max functions. --- source/libs/function/src/builtinsimpl.c | 46 ++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 58e41d5abb..e3a76ad33a 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -911,7 +911,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { case TSDB_DATA_TYPE_FLOAT: { float* plist = (float*)pCol->pData; - float val = 0; +// float val = 0; for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) { if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) { continue; @@ -919,9 +919,9 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { numOfElem += 1; pAvgRes->count += 1; - val += plist[i]; + pAvgRes->sum.dsum += plist[i]; } - pAvgRes->sum.dsum = val; +// pAvgRes->sum.dsum = val; break; } @@ -1282,14 +1282,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1319,14 +1319,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1356,14 +1356,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1393,14 +1393,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1432,14 +1432,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1469,14 +1469,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1506,14 +1506,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1543,14 +1543,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1581,14 +1581,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { // ignore the equivalent data value // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); @@ -1629,14 +1629,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { #endif // NOTE: An faster version to avoid one additional comparison with FPU. if (isMinFunc) { // min - if (*val < pData[i]) { + if (*val > pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); } } } else { // max - if (*val > pData[i]) { + if (*val < pData[i]) { *val = pData[i]; if (pCtx->subsidiaries.num > 0) { updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos); From 0c427b5f5b63408f4d764b096b46ebe9b8882e76 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 9 Nov 2022 10:39:32 +0800 Subject: [PATCH 17/23] refactor: do some internal refactor. --- include/common/tdataformat.h | 2 +- source/common/src/tdataformat.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 173 +++++++++++++----------- source/libs/function/src/builtinsimpl.c | 1 + 4 files changed, 97 insertions(+), 81 deletions(-) diff --git a/include/common/tdataformat.h b/include/common/tdataformat.h index 71e260c001..2eda2f66cc 100644 --- a/include/common/tdataformat.h +++ b/include/common/tdataformat.h @@ -130,7 +130,7 @@ void tColDataInit(SColData *pColData, int16_t cid, int8_t type, int8_t smaOn) void tColDataClear(SColData *pColData); int32_t tColDataAppendValue(SColData *pColData, SColVal *pColVal); void tColDataGetValue(SColData *pColData, int32_t iVal, SColVal *pColVal); -uint8_t tColDataGetBitValue(SColData *pColData, int32_t iVal); +uint8_t tColDataGetBitValue(const SColData *pColData, int32_t iVal); int32_t tColDataCopy(SColData *pColDataSrc, SColData *pColDataDest); extern void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_t *min, int16_t *numOfNull); diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 8c003066dc..73b887342c 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1557,7 +1557,7 @@ void tColDataGetValue(SColData *pColData, int32_t iVal, SColVal *pColVal) { tColDataGetValueImpl[pColData->flag](pColData, iVal, pColVal); } -uint8_t tColDataGetBitValue(SColData *pColData, int32_t iVal) { +uint8_t tColDataGetBitValue(const SColData *pColData, int32_t iVal) { uint8_t v; switch (pColData->flag) { case HAS_NONE: diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 0f970b6830..9dab1e1d33 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -910,7 +910,7 @@ static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData return endPos; } -static void copyPrimaryTsCol(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, +static void copyPrimaryTsCol(const SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, int32_t dumpedRows, bool asc) { if (asc) { memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); @@ -930,6 +930,97 @@ static void copyPrimaryTsCol(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpIn } } +// a faster version of copy procedure. +static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + uint8_t* p = NULL; + if (asc) { + p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + p = pData->pData + tDataTypes[pData->type].bytes * startIndex; + } + + int32_t step = asc? 1:-1; + + // make sure it is aligned to 8bit + ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + + // 1. copy data in a batch model + memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); + + // 2. reverse the array list in case of descending order scan data block + if (!asc) { + switch(pColData->info.type) { + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: + { + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: { + int32_t mid = dumpedRows >> 1u; + int8_t* pts = (int8_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: { + int32_t mid = dumpedRows >> 1u; + int16_t* pts = (int16_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: { + int32_t mid = dumpedRows >> 1u; + int32_t* pts = (int32_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + } + } + + // 3. if the null value exists, check items one-by-one + if (pData->flag != HAS_VALUE) { + int32_t rowIndex = 0; + + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { + uint8_t v = tColDataGetBitValue(pData, j); + if (v == 0 || v == 1) { + colDataSetNull_f(pColData->nullbitmap, rowIndex); + pColData->hasNull = true; + } + } + } +} + static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; @@ -997,84 +1088,8 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn colDataAppendNNULL(pColData, 0, dumpedRows); } else { if (IS_MATHABLE_TYPE(pColData->info.type)) { - - uint8_t* p = NULL; - if (asc) { - p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; - } else { - int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; - p = pData->pData + tDataTypes[pData->type].bytes * startIndex; - } - - // make sure it is aligned to 8bit - ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); - memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); - - if (!asc) { - switch(pColData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_UBIGINT: - { - int32_t mid = dumpedRows >> 1u; - int64_t* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_UTINYINT: { - int32_t mid = dumpedRows >> 1u; - int8_t* pts = (int8_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_USMALLINT: { - int32_t mid = dumpedRows >> 1u; - int16_t* pts = (int16_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_UINT: { - int32_t mid = dumpedRows >> 1u; - int32_t* pts = (int32_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - } - } - - // null value exists, check one-by-one - if (pData->flag != HAS_VALUE) { - for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { - uint8_t v = tColDataGetBitValue(pData, j); - if (v == 0 || v == 1) { - colDataSetNull_f(pColData->nullbitmap, rowIndex); - } - } - } - } else { + copyNumericCols(pData, pDumpInfo, pColData, dumpedRows, asc); + } else { // varchar/nchar type for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { tColDataGetValue(pData, j, &cv); doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index e3a76ad33a..eb30945109 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3022,6 +3022,7 @@ int32_t firstFunction(SqlFunctionCtx* pCtx) { } } #endif + if (numOfElems == 0) { // save selectivity value for column consisted of all null values firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo); From 459bed5f86fd60bc1d615d40fe04a3d08e7c6c1c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 9 Nov 2022 10:41:53 +0800 Subject: [PATCH 18/23] enh(query): improve the query perf. --- source/libs/function/src/builtinsimpl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index eb30945109..6eb57c1a18 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3007,6 +3007,7 @@ int32_t firstFunction(SqlFunctionCtx* pCtx) { } } #else + int64_t* pts = (int64_t*) pInput->pPTS->pData; for (int32_t i = pInput->startRowIndex; i < pInput->startRowIndex + pInput->numOfRows; ++i) { if (pInputCol->hasNull && colDataIsNull(pInputCol, pInput->totalRows, i, pColAgg)) { continue; @@ -3015,7 +3016,7 @@ int32_t firstFunction(SqlFunctionCtx* pCtx) { numOfElems++; char* data = colDataGetData(pInputCol, i); - TSKEY cts = getRowPTs(pInput->pPTS, i); + TSKEY cts = pts[i]; if (pResInfo->numOfRes == 0 || pInfo->ts > cts) { doSaveCurrentVal(pCtx, i, cts, pInputCol->info.type, data); pResInfo->numOfRes = 1; From 372d26f74ba19bfe8088bcacd52faf7d7eadb366 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Wed, 9 Nov 2022 11:24:11 +0800 Subject: [PATCH 19/23] refactor(sync): delete assert, call FpCommitCb when multi replica --- source/libs/sync/src/syncMain.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 14823be098..b4a3d30f7d 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -225,7 +225,7 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { sError("sync begin snapshot error"); return -1; } - + int32_t code = 0; if (syncNodeIsMnode(pSyncNode)) { @@ -390,7 +390,7 @@ bool syncIsReadyForRead(int64_t rid) { if (!pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore)) { SSyncRaftEntry* pEntry = NULL; int32_t code = pSyncNode->pLogStore->syncLogGetEntry( - pSyncNode->pLogStore, pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore), &pEntry); + pSyncNode->pLogStore, pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore), &pEntry); if (code == 0 && pEntry != NULL) { if (pEntry->originalRpcType == TDMT_SYNC_NOOP && pEntry->term == pSyncNode->pRaftStore->currentTerm) { ready = true; @@ -2462,11 +2462,36 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncInd } else { syncEntryDestory(pEntry); } + return -1; } else { // del resp mgr, call FpCommitCb - ASSERT(0); + + SRpcMsg rpcMsg = {0}; + syncClientRequest2RpcMsg(pMsg, &rpcMsg); + + SFsmCbMeta cbMeta = { + .index = pEntry->index, + .lastConfigIndex = SYNC_INDEX_INVALID, + .isWeak = pEntry->isWeak, + .code = -1, + .state = ths->state, + .seqNum = pEntry->seqNum, + .term = pEntry->term, + .currentTerm = ths->pRaftStore->currentTerm, + .flag = 0, + }; + + syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); + ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta); + + if (h) { + taosLRUCacheRelease(ths->pLogStore->pCache, h, false); + } else { + syncEntryDestory(pEntry); + } + return -1; } } From 9533ad41987a9d16ade49c934dae3f17502dbd91 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Wed, 9 Nov 2022 11:24:45 +0800 Subject: [PATCH 20/23] feat: taosbenchmark support query fails then continue (#17980) --- cmake/taostools_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index e0d5250d84..d18d85171d 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 0fb640b + GIT_TAG a921bd4 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From da13891ca491b581efa2fee4dc8da6c60b178450 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Wed, 9 Nov 2022 09:29:59 +0800 Subject: [PATCH 21/23] fix(stream):partition operator memory leak --- source/libs/executor/src/groupoperator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 891eb6e7a4..2a7d1b089b 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -918,6 +918,8 @@ static SSDataBlock* buildStreamPartitionResult(SOperatorInfo* pOperator) { blockDataDestroy(pResBlock); } } + taosArrayDestroy(pParInfo->rowIds); + pParInfo->rowIds = NULL; blockDataUpdateTsWindow(pDest, pInfo->tsColIndex); pDest->info.groupId = pParInfo->groupId; pOperator->resultInfo.totalRows += pDest->info.rows; @@ -1016,6 +1018,7 @@ static void destroyStreamPartitionOperatorInfo(void* param) { cleanupExprSupp(&pInfo->tbnameCalSup); cleanupExprSupp(&pInfo->tagCalSup); blockDataDestroy(pInfo->pDelRes); + taosHashCleanup(pInfo->pPartitions); taosMemoryFreeClear(param); } From caf5bddb0441bf30c094916a0c90a629c699f394 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Wed, 9 Nov 2022 14:43:56 +0800 Subject: [PATCH 22/23] fix: taos_fetch_rows_a error --- source/client/src/clientMain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 71a87a4b54..efa7d095c5 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -990,7 +990,7 @@ void taos_fetch_rows_a(TAOS_RES *res, __taos_async_fn_t fp, void *param) { // all data has returned to App already, no need to try again if (pResultInfo->completed) { // it is a local executed query, no need to do async fetch - if (QUERY_EXEC_MODE_LOCAL == pRequest->body.execMode) { + if (QUERY_EXEC_MODE_SCHEDULE != pRequest->body.execMode) { if (pResultInfo->localResultFetched) { pResultInfo->numOfRows = 0; pResultInfo->current = 0; From 4000176dc736b8d004bb6dc60fcb3a77fdc70aed Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 9 Nov 2022 14:44:04 +0800 Subject: [PATCH 23/23] enh(tmq): auto clear lost consumer --- include/common/tmsg.h | 2 +- include/common/tmsgdef.h | 3 +- source/dnode/mnode/impl/inc/mndConsumer.h | 1 + source/dnode/mnode/impl/src/mndConsumer.c | 69 +++++++++++++++++++++-- 4 files changed, 69 insertions(+), 6 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index c2cd7de139..643588de19 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1802,7 +1802,7 @@ int32_t tDeserializeSCMCreateTopicRsp(void* buf, int32_t bufLen, SCMCreateTopicR typedef struct { int64_t consumerId; -} SMqConsumerLostMsg, SMqConsumerRecoverMsg; +} SMqConsumerLostMsg, SMqConsumerRecoverMsg, SMqConsumerClearMsg; typedef struct { int64_t consumerId; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 9868fc49bc..a12a635837 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -149,7 +149,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_TMQ_DO_REBALANCE, "do-rebalance", SMqDoRebalanceMsg, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TMQ_DROP_CGROUP, "drop-cgroup", SMqDropCGroupReq, SMqDropCGroupRsp) TD_DEF_MSG_TYPE(TDMT_MND_UNUSED2, "unused2", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_MND_TMQ_TIMER, "mq-tmr", SMTimerReq, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_TMQ_TIMER, "tmq-tmr", SMTimerReq, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TELEM_TIMER, "telem-tmr", SMTimerReq, SMTimerReq) TD_DEF_MSG_TYPE(TDMT_MND_TRANS_TIMER, "trans-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TTL_TIMER, "ttl-tmr", NULL, NULL) @@ -171,6 +171,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_SHOW_VARIABLES, "show-variables", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_MSG) diff --git a/source/dnode/mnode/impl/inc/mndConsumer.h b/source/dnode/mnode/impl/inc/mndConsumer.h index 210e336ac2..1176e1af0b 100644 --- a/source/dnode/mnode/impl/inc/mndConsumer.h +++ b/source/dnode/mnode/impl/inc/mndConsumer.h @@ -44,6 +44,7 @@ SSdbRaw *mndConsumerActionEncode(SMqConsumerObj *pConsumer); SSdbRow *mndConsumerActionDecode(SSdbRaw *pRaw); int32_t mndSetConsumerCommitLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer); +int32_t mndSetConsumerDropLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer); bool mndRebTryStart(); void mndRebEnd(); diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index df999316eb..62ad5bae15 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -32,7 +32,8 @@ #define MND_CONSUMER_VER_NUMBER 1 #define MND_CONSUMER_RESERVE_SIZE 64 -#define MND_CONSUMER_LOST_HB_CNT 3 +#define MND_CONSUMER_LOST_HB_CNT 3 +#define MND_CONSUMER_LOST_CLEAR_THRESHOLD 43200 static int8_t mqRebInExecCnt = 0; @@ -50,6 +51,7 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg); static int32_t mndProcessMqHbReq(SRpcMsg *pMsg); static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg); static int32_t mndProcessConsumerLostMsg(SRpcMsg *pMsg); +static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg); static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg); int32_t mndInitConsumer(SMnode *pMnode) { @@ -69,6 +71,7 @@ int32_t mndInitConsumer(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessMqTimerMsg); mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_LOST, mndProcessConsumerLostMsg); mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg); + mndSetMsgHandle(pMnode, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, mndProcessConsumerClearMsg); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndRetrieveConsumer); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndCancelGetNextConsumer); @@ -162,6 +165,43 @@ FAIL: return -1; } +static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) { + SMnode *pMnode = pMsg->info.node; + SMqConsumerClearMsg *pClearMsg = pMsg->pCont; + SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pClearMsg->consumerId); + if (pConsumer == NULL) { + return 0; + } + + mInfo("receive consumer clear msg, consumer id %" PRId64 ", status %s", pClearMsg->consumerId, + mndConsumerStatusName(pConsumer->status)); + + if (pConsumer->status != MQ_CONSUMER_STATUS__LOST_REBD) { + mndReleaseConsumer(pMnode, pConsumer); + return -1; + } + + SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup); + pConsumerNew->updateType = CONSUMER_UPDATE__LOST; + + mndReleaseConsumer(pMnode, pConsumer); + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, pMsg, "clear-csm"); + if (pTrans == NULL) goto FAIL; + if (mndSetConsumerDropLogs(pMnode, pTrans, pConsumerNew) != 0) goto FAIL; + if (mndTransPrepare(pMnode, pTrans) != 0) goto FAIL; + + tDeleteSMqConsumerObj(pConsumerNew); + taosMemoryFree(pConsumerNew); + mndTransDrop(pTrans); + return 0; +FAIL: + tDeleteSMqConsumerObj(pConsumerNew); + taosMemoryFree(pConsumerNew); + mndTransDrop(pTrans); + return -1; +} + static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) { SMqRebInfo *pRebInfo = taosHashGet(pHash, key, strlen(key) + 1); if (pRebInfo == NULL) { @@ -206,15 +246,28 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { SMqConsumerLostMsg *pLostMsg = rpcMallocCont(sizeof(SMqConsumerLostMsg)); pLostMsg->consumerId = pConsumer->consumerId; - SRpcMsg pRpcMsg = { + SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_CONSUMER_LOST, .pCont = pLostMsg, .contLen = sizeof(SMqConsumerLostMsg), }; - tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &pRpcMsg); + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } - if (status == MQ_CONSUMER_STATUS__LOST_REBD || status == MQ_CONSUMER_STATUS__READY) { + + if (status == MQ_CONSUMER_STATUS__READY) { // do nothing + } else if (status == MQ_CONSUMER_STATUS__LOST_REBD) { + if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { + SMqConsumerClearMsg *pClearMsg = rpcMallocCont(sizeof(SMqConsumerClearMsg)); + + pClearMsg->consumerId = pConsumer->consumerId; + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, + .pCont = pClearMsg, + .contLen = sizeof(SMqConsumerClearMsg), + }; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + } } else if (status == MQ_CONSUMER_STATUS__LOST) { taosRLockLatch(&pConsumer->lock); int32_t topicNum = taosArrayGetSize(pConsumer->currentTopics); @@ -444,6 +497,14 @@ FAIL: return -1; } +int32_t mndSetConsumerDropLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer) { + SSdbRaw *pCommitRaw = mndConsumerActionEncode(pConsumer); + if (pCommitRaw == NULL) return -1; + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) return -1; + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED) != 0) return -1; + return 0; +} + int32_t mndSetConsumerCommitLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer) { SSdbRaw *pCommitRaw = mndConsumerActionEncode(pConsumer); if (pCommitRaw == NULL) return -1;