diff --git a/include/common/tglobal.h b/include/common/tglobal.h index aff5945f9f..d7a3d84424 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -191,6 +191,7 @@ extern int64_t tsWalFsyncDataSizeLimit; extern int32_t tsTransPullupInterval; extern int32_t tsMqRebalanceInterval; extern int32_t tsStreamCheckpointTickInterval; +extern int32_t tsStreamNodeCheckInterval; extern int32_t tsTtlUnit; extern int32_t tsTtlPushIntervalSec; extern int32_t tsTtlBatchDropNum; @@ -203,7 +204,6 @@ extern int32_t tsRpcRetryInterval; extern bool tsDisableStream; extern int64_t tsStreamBufferSize; -extern int64_t tsCheckpointInterval; extern bool tsFilterScalarMode; extern int32_t tsKeepTimeOffset; extern int32_t tsMaxStreamBackendCache; diff --git a/include/common/tmisce.h b/include/common/tmisce.h index bc6558900c..3d1afcd21f 100644 --- a/include/common/tmisce.h +++ b/include/common/tmisce.h @@ -28,6 +28,22 @@ typedef struct SCorEpSet { } SCorEpSet; #define GET_ACTIVE_EP(_eps) (&((_eps)->eps[(_eps)->inUse])) + +#define EPSET_TO_STR(_eps, tbuf) \ + do { \ + int len = snprintf((tbuf), sizeof(tbuf), "epset:{"); \ + for (int _i = 0; _i < (_eps)->numOfEps; _i++) { \ + if (_i == (_eps)->numOfEps - 1) { \ + len += \ + snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \ + } else { \ + len += \ + snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d, ", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \ + } \ + } \ + len += snprintf((tbuf) + len, sizeof(tbuf) - len, "}, inUse:%d", (_eps)->inUse); \ + } while (0); + int32_t taosGetFqdnPortFromEp(const char* ep, SEp* pEp); void addEpIntoEpSet(SEpSet* pEpSet, const char* fqdn, uint16_t port); diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 29f0667dac..fb2c780724 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -157,6 +157,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_TRANS_TIMER, "trans-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TTL_TIMER, "ttl-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GRANT_HB_TIMER, "grant-hb-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_NODECHECK_TIMER, "node-check-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_TRANS, "kill-trans", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_QUERY, "kill-query", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_CONN, "kill-conn", NULL, NULL) @@ -175,13 +176,16 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL) - // TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) - // TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_HEARTBEAT, "stream-heartbeat", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_BALANCE_VGROUP_LEADER, "balance-vgroup-leader", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_RESTORE_DNODE, "restore-dnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_PAUSE_STREAM, "pause-stream", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_MSG) @@ -255,15 +259,13 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_PAUSE, "stream-task-pause", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESUME, "stream-task-resume", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_STOP, "stream-task-stop", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_MON_MSG) TD_DEF_MSG_TYPE(TDMT_MON_MAX_MSG, "monitor-max", NULL, NULL) @@ -300,9 +302,12 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) - TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) +// TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY_FINISH, "vnode-stream-scan-history-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_UPDATE, "vnode-stream-update", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index e2adcb12dc..5990ae1c9c 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -41,23 +41,21 @@ typedef struct { } SLocalFetch; typedef struct { - void* tqReader; - void* config; - void* vnode; - void* mnd; - SMsgCb* pMsgCb; - int64_t version; - bool initMetaReader; - bool initTableReader; - bool initTqReader; - int32_t numOfVgroups; - void* sContext; // SSnapContext* + void* tqReader; // todo remove it + void* vnode; + void* mnd; + SMsgCb* pMsgCb; + int64_t version; + uint64_t checkpointId; + bool initTableReader; + bool initTqReader; + int32_t numOfVgroups; + void* sContext; // SSnapContext* + void* pStateBackend; + int8_t fillHistory; + STimeWindow winRange; - void* pStateBackend; struct SStorageAPI api; - - int8_t fillHistory; - STimeWindow winRange; } SReadHandle; // in queue mode, data streams are seperated by msg @@ -97,9 +95,6 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); -// todo refactor -void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId); - /** * Set multiple input data blocks for the stream scan. * @param tinfo diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 873b95b29f..0a240dd8f5 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -379,7 +379,7 @@ typedef struct SStateStore { state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); int32_t (*streamStateSessionGetKeyByRange)(SStreamState* pState, const SSessionKey* range, SSessionKey* curKey); - SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark); + SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark, bool igUp); TSKEY (*updateInfoFillBlockData)(SUpdateInfo* pInfo, SSDataBlock* pBlock, int32_t primaryTsCol); bool (*updateInfoIsUpdated)(SUpdateInfo* pInfo, uint64_t tableId, TSKEY ts); bool (*updateInfoIsTableInserted)(SUpdateInfo* pInfo, int64_t tbUid); @@ -387,7 +387,7 @@ typedef struct SStateStore { void (*windowSBfDelete)(SUpdateInfo *pInfo, uint64_t count); void (*windowSBfAdd)(SUpdateInfo *pInfo, uint64_t count); - SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark); + SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark, bool igUp); void (*updateInfoAddCloseWindowSBF)(SUpdateInfo* pInfo); void (*updateInfoDestoryColseWinSBF)(SUpdateInfo* pInfo); int32_t (*updateInfoSerialize)(void* buf, int32_t bufLen, const SUpdateInfo* pInfo); @@ -398,7 +398,8 @@ typedef struct SStateStore { SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key); struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize, - uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id); + uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, + const char* id, int64_t ckId); void (*streamFileStateDestroy)(struct SStreamFileState* pFileState); void (*streamFileStateClear)(struct SStreamFileState* pFileState); diff --git a/include/libs/stream/streamSnapshot.h b/include/libs/stream/streamSnapshot.h new file mode 100644 index 0000000000..15d5f56ffd --- /dev/null +++ b/include/libs/stream/streamSnapshot.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef _STREAM_BACKEDN_SNAPSHOT_H_ +#define _STREAM_BACKEDN_SNAPSHOT_H_ +#include "tcommon.h" + +#define STREAM_STATE_TRANSFER "stream-state-transfer" + +typedef struct SStreamSnapReader SStreamSnapReader; +typedef struct SStreamSnapWriter SStreamSnapWriter; + +typedef struct SStreamSnapHandle SStreamSnapHandle; +typedef struct SStreamSnapBlockHdr SStreamSnapBlockHdr; + +int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapReader** ppReader); +int32_t streamSnapReaderClose(SStreamSnapReader* pReader); +int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size); + +// SMetaSnapWriter ======================================== +int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter); +int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t streamSnapWriterClose(SStreamSnapWriter* ppWriter, int8_t rollback); + +#endif \ No newline at end of file diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index b47288bf45..1b3960bdba 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -20,6 +20,7 @@ #include "tmsg.h" #include "tmsgcb.h" #include "tqueue.h" +#include "ttimer.h" #ifdef __cplusplus extern "C" { @@ -48,6 +49,8 @@ enum { TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner TASK_STATUS__HALT, // pause, but not be manipulated by user command TASK_STATUS__PAUSE, // pause + TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore + TASK_STATUS__CK_READY, }; enum { @@ -61,15 +64,12 @@ enum { enum { TASK_INPUT_STATUS__NORMAL = 1, TASK_INPUT_STATUS__BLOCKED, - TASK_INPUT_STATUS__RECOVER, - TASK_INPUT_STATUS__STOP, TASK_INPUT_STATUS__FAILED, }; enum { TASK_OUTPUT_STATUS__NORMAL = 1, TASK_OUTPUT_STATUS__WAIT, - TASK_OUTPUT_STATUS__BLOCKED, }; enum { @@ -97,11 +97,16 @@ enum { STREAM_QUEUE__PROCESSING, }; +enum { + STREAM_META_WILL_STOP = 1, + STREAM_META_OK_TO_STOP = 2, +}; + typedef struct { int8_t type; } SStreamQueueItem; -typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { @@ -119,14 +124,13 @@ typedef struct { } SStreamMergedSubmit; typedef struct { - int8_t type; - + int8_t type; + int64_t nodeId; // nodeId, from SStreamMeta int32_t srcVgId; int32_t srcTaskId; int32_t childId; int64_t sourceVer; int64_t reqId; - SArray* blocks; // SArray } SStreamDataBlock; @@ -136,10 +140,6 @@ typedef struct { SSDataBlock* pBlock; } SStreamRefDataBlock; -typedef struct { - int8_t type; -} SStreamCheckpoint; - typedef struct { int8_t type; SSDataBlock* pBlock; @@ -189,6 +189,7 @@ int32_t streamInit(); void streamCleanUp(); SStreamQueue* streamQueueOpen(int64_t cap); +void streamQueueCleanup(SStreamQueue* pQueue); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); static FORCE_INLINE void streamQueueProcessSuccess(SStreamQueue* queue) { @@ -252,20 +253,26 @@ typedef struct SStreamChildEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; - int8_t dataAllowed; SEpSet epSet; + bool dataAllowed; // denote if the data from this upstream task is allowed to put into inputQ, not serialize it + int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer } SStreamChildEpInfo; -typedef struct SStreamId { +typedef struct SStreamTaskKey { + int64_t streamId; + int64_t taskId; +} SStreamTaskKey; + +typedef struct SStreamTaskId { int64_t streamId; int32_t taskId; const char* idStr; -} SStreamId; +} SStreamTaskId; typedef struct SCheckpointInfo { - int64_t id; - int64_t version; // offset in WAL - int64_t currentVer; // current offset in WAL, not serialize it + int64_t checkpointId; + int64_t checkpointVer; // latest checkpointId version + int64_t currentVer; // current offset in WAL, not serialize it } SCheckpointInfo; typedef struct SStreamStatus { @@ -273,10 +280,9 @@ typedef struct SStreamStatus { int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set int8_t schedStatus; int8_t keepTaskStatus; - bool transferState; bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it - int8_t timerActive; // timer is active - int8_t pauseAllowed; // allowed task status to be set to be paused + int8_t timerActive; // timer is active + int8_t pauseAllowed; // allowed task status to be set to be paused } SStreamStatus; typedef struct SHistDataRange { @@ -287,6 +293,7 @@ typedef struct SHistDataRange { typedef struct SSTaskBasicInfo { int32_t nodeId; // vgroup id or snode id SEpSet epSet; + SEpSet mnodeEpset; // mnode epset for send heartbeat int32_t selfChildId; int32_t totalLevel; int8_t taskLevel; @@ -314,7 +321,7 @@ typedef struct { struct SStreamTask { int64_t ver; - SStreamId id; + SStreamTaskId id; SSTaskBasicInfo info; STaskOutputInfo outputInfo; SDispatchMsgInfo msgInfo; @@ -322,12 +329,15 @@ struct SStreamTask { SCheckpointInfo chkInfo; STaskExec exec; SHistDataRange dataRange; - SStreamId historyTaskId; - SStreamId streamTaskId; - SArray* pUpstreamEpInfoList; // SArray, // children info + SStreamTaskId historyTaskId; + SStreamTaskId streamTaskId; int32_t nextCheckId; SArray* checkpointInfo; // SArray STaskTimestamp tsInfo; + SArray* pReadyMsgList; // SArray + TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ + SArray* pUpstreamInfoList; + // output union { STaskDispatcherFixedEp fixedEpDispatcher; @@ -348,7 +358,6 @@ struct SStreamTask { SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend SArray* pRspMsgList; - TdThreadMutex lock; // the followings attributes don't be serialized int32_t notReadyTasks; @@ -358,11 +367,18 @@ struct SStreamTask { int32_t refCnt; int64_t checkpointingId; int32_t checkpointAlignCnt; + int32_t checkpointNotReadyTasks; int32_t transferStateAlignCnt; struct SStreamMeta* pMeta; SSHashObj* pNameMap; }; +typedef struct SMetaHbInfo { + tmr_h hbTmr; + int32_t stopFlag; + int32_t tickCounter; +} SMetaHbInfo; + // meta typedef struct SStreamMeta { char* path; @@ -375,12 +391,25 @@ typedef struct SStreamMeta { TXN* txn; FTaskExpand* expandFunc; int32_t vgId; + int64_t stage; SRWLatch lock; int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; SHashObj* pTaskBackendUnique; TdThreadMutex backendMutex; + SMetaHbInfo hbInfo; + int32_t closedTask; + int32_t totalTasks; // this value should be increased when a new task is added into the meta + int32_t chkptNotReadyTasks; + int64_t rid; + + int64_t chkpId; + SArray* chkpSaved; + SArray* chkpInUse; + int32_t chkpCap; + SRWLatch chkpDirLock; + int32_t pauseTaskNum; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -391,8 +420,12 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeStreamTask(SStreamTask* pTask); -int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem); -bool tInputQueueIsFull(const SStreamTask* pTask); +int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver); + +int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo); + +int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem); +bool tInputQueueIsFull(const SStreamTask* pTask); typedef struct { SMsgHead head; @@ -401,8 +434,9 @@ typedef struct { } SStreamTaskRunReq; typedef struct { - int64_t streamId; int32_t type; + int64_t stage; // nodeId from upstream task + int64_t streamId; int32_t taskId; int32_t srcVgId; int32_t upstreamTaskId; @@ -443,6 +477,7 @@ typedef struct { typedef struct { int64_t reqId; + int64_t stage; int64_t streamId; int32_t upstreamNodeId; int32_t upstreamTaskId; @@ -459,6 +494,7 @@ typedef struct { int32_t downstreamNodeId; int32_t downstreamTaskId; int32_t childId; + int32_t oldStage; int8_t status; } SStreamTaskCheckRsp; @@ -485,6 +521,8 @@ typedef struct { int64_t checkpointId; int32_t taskId; int32_t nodeId; + SEpSet mgmtEps; + int32_t mnodeId; int64_t expireTime; } SStreamCheckpointSourceReq; @@ -493,14 +531,16 @@ typedef struct { int64_t checkpointId; int32_t taskId; int32_t nodeId; + int32_t mnodeId; int64_t expireTime; + int8_t success; } SStreamCheckpointSourceRsp; -int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); -int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); +int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); +int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); -int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); -int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); +int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); +int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); typedef struct { SMsgHead msgHead; @@ -511,28 +551,25 @@ typedef struct { int32_t upstreamTaskId; int32_t upstreamNodeId; int32_t childId; - int64_t expireTime; - int8_t taskLevel; -} SStreamCheckpointReq; +} SStreamCheckpointReadyMsg; -typedef struct { - SMsgHead msgHead; - int64_t streamId; - int64_t checkpointId; - int32_t downstreamTaskId; - int32_t downstreamNodeId; - int32_t upstreamTaskId; - int32_t upstreamNodeId; - int32_t childId; - int64_t expireTime; - int8_t taskLevel; -} SStreamCheckpointRsp; +int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pRsp); +int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp); -int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq); -int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq); +typedef struct STaskStatusEntry { + int64_t streamId; + int32_t taskId; + int32_t status; +} STaskStatusEntry; -int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); -int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); +typedef struct SStreamHbMsg { + int32_t vgId; + int32_t numOfTasks; + SArray* pTaskStatus; // SArray +} SStreamHbMsg; + +int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp); +int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pRsp); typedef struct { int64_t streamId; @@ -545,6 +582,29 @@ typedef struct { int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq); int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistoryMsg* pReq); +typedef struct SNodeUpdateInfo { + int32_t nodeId; + SEpSet prevEp; + SEpSet newEp; +} SNodeUpdateInfo; + +typedef struct SStreamTaskNodeUpdateMsg { + int64_t streamId; + int32_t taskId; + SArray* pNodeList; // SArray +} SStreamTaskNodeUpdateMsg; + +int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg); +int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg); + +typedef struct SStreamTaskNodeUpdateRsp { + int64_t streamId; + int32_t taskId; +} SStreamTaskNodeUpdateRsp; + +int32_t tEncodeStreamTaskUpdateRsp(SEncoder* pEncoder, const SStreamTaskNodeUpdateRsp* pMsg); +int32_t tDecodeStreamTaskUpdateRsp(SDecoder* pDecoder, SStreamTaskNodeUpdateRsp* pMsg); + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -564,16 +624,11 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); -int32_t tEncodeSStreamTaskScanHistoryReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq); -int32_t tDecodeSStreamTaskScanHistoryReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq); - -int32_t tEncodeSStreamTaskRecoverRsp(SEncoder* pEncoder, const SStreamRecoverDownstreamRsp* pRsp); -int32_t tDecodeSStreamTaskRecoverRsp(SDecoder* pDecoder, SStreamRecoverDownstreamRsp* pRsp); - +int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq); int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); + int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); - void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); int32_t streamSetupScheduleTrigger(SStreamTask* pTask); @@ -581,10 +636,9 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessRunReq(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); -void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); -void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); -int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); +int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); +SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); void streamTaskInputFail(SStreamTask* pTask); int32_t streamTryExec(SStreamTask* pTask); @@ -594,16 +648,19 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); +int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); +void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); // recover and fill history void streamTaskCheckDownstreamTasks(SStreamTask* pTask); -int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask); int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); -int32_t streamTaskCheckStatus(SStreamTask* pTask); +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); +int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); +void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); + +int32_t streamTaskStop(SStreamTask* pTask); int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, SRpcHandleInfo* pRpcInfo, int32_t taskId); int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp); @@ -611,17 +668,26 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask); int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); +int32_t streamTaskGetInputQItems(const SStreamTask* pTask); // common int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamSetStatusNormal(SStreamTask* pTask); const char* streamGetTaskStatusStr(int32_t status); -void streamTaskPause(SStreamTask* pTask); -void streamTaskResume(SStreamTask* pTask); +void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); +void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskHalt(SStreamTask* pTask); void streamTaskResumeFromHalt(SStreamTask* pTask); void streamTaskDisablePause(SStreamTask* pTask); void streamTaskEnablePause(SStreamTask* pTask); +int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); +void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); +int32_t streamTaskReleaseState(SStreamTask* pTask); +int32_t streamTaskReloadState(SStreamTask* pTask); +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); @@ -640,31 +706,32 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); // stream task meta void streamMetaInit(); void streamMetaCleanup(); -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId); +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage); void streamMetaClose(SStreamMeta* streamMeta); - -// save to b-tree meta store -int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); +int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey); int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); -int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); // todo remove it +int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); +int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); - -int32_t streamMetaBegin(SStreamMeta* pMeta); -int32_t streamMetaCommit(SStreamMeta* pMeta); -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); +int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId); +int32_t streamMetaCommit(SStreamMeta* pMeta); +int32_t streamLoadTasks(SStreamMeta* pMeta); +void streamMetaNotifyClose(SStreamMeta* pMeta); // checkpoint -int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); -int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); -int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); +int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); -int32_t streamTaskReleaseState(SStreamTask* pTask); -int32_t streamTaskReloadState(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); +int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, + int8_t isSucceed); +int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, + int8_t isSucceed); + #ifdef __cplusplus } #endif diff --git a/include/libs/stream/tstreamFileState.h b/include/libs/stream/tstreamFileState.h index b2255013ca..052231fe39 100644 --- a/include/libs/stream/tstreamFileState.h +++ b/include/libs/stream/tstreamFileState.h @@ -31,7 +31,8 @@ typedef struct SStreamFileState SStreamFileState; typedef SList SStreamSnapshot; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark, const char* id); + GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, + int64_t checkpointId); void streamFileStateDestroy(SStreamFileState* pFileState); void streamFileStateClear(SStreamFileState* pFileState); bool needClearDiskBuff(SStreamFileState* pFileState); @@ -44,7 +45,7 @@ bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen); SStreamSnapshot* getSnapshot(SStreamFileState* pFileState); int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState); -int32_t recoverSnapshot(SStreamFileState* pFileState); +int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId); int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list); int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark); diff --git a/include/libs/stream/tstreamUpdate.h b/include/libs/stream/tstreamUpdate.h index 7bb1d027c9..41ada56904 100644 --- a/include/libs/stream/tstreamUpdate.h +++ b/include/libs/stream/tstreamUpdate.h @@ -43,8 +43,8 @@ typedef struct SUpdateKey { // uint64_t maxDataVersion; //} SUpdateInfo; -SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark); -SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark); +SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp); +SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp); TSKEY updateInfoFillBlockData(SUpdateInfo *pInfo, SSDataBlock *pBlock, int32_t primaryTsCol); bool updateInfoIsUpdated(SUpdateInfo *pInfo, uint64_t tableId, TSKEY ts); bool updateInfoIsTableInserted(SUpdateInfo *pInfo, int64_t tbUid); diff --git a/include/util/tarray.h b/include/util/tarray.h index f56c9e3a17..4d9c930521 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -200,8 +200,11 @@ void taosArrayClear(SArray* pArray); * @param pArray * @param fp */ + void taosArrayClearEx(SArray* pArray, void (*fp)(void*)); +void taosArrayClearP(SArray* pArray, void (*fp)(void*)); + void* taosArrayDestroy(SArray* pArray); void taosArrayDestroyP(SArray* pArray, FDelete fp); diff --git a/include/util/tencode.h b/include/util/tencode.h index ff97a20507..d05d4914e3 100644 --- a/include/util/tencode.h +++ b/include/util/tencode.h @@ -89,7 +89,7 @@ typedef struct { RET = -1; \ } \ tEncoderClear(&coder); \ - } while (0) + } while (0); static void* tEncoderMalloc(SEncoder* pCoder, int32_t size); static void* tDecoderMalloc(SDecoder* pCoder, int32_t size); diff --git a/include/util/types.h b/include/util/types.h index b49670220b..0aa01a66f5 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -85,8 +85,6 @@ typedef uint16_t VarDataLenT; // maxVarDataLen: 65535 #define varDataVal(v) ((char *)(v) + VARSTR_HEADER_SIZE) #define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) -#define NCHAR_WIDTH_TO_BYTES(n) ((n)*TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE) - typedef int32_t VarDataOffsetT; typedef struct tstr { diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 6f978b0143..a1c8690dfc 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -826,6 +826,25 @@ TEST(clientCase, projection_query_tables) { } taos_free_result(pRes); + int64_t start = 1685959190000; + + int32_t code = -1; + for(int32_t i = 0; i < 1000000; ++i) { + char t[512] = {0}; + + sprintf(t, "insert into t1 values(%ld, %ld)", start + i, i); + while(1) { + void* p = taos_query(pConn, t); + code = taos_errno(p); + taos_free_result(p); + if (code != 0) { + printf("insert data error, retry\n"); + } else { + break; + } + } + } + for (int32_t i = 0; i < 1; ++i) { printf("create table :%d\n", i); createNewTable(pConn, i); @@ -901,13 +920,40 @@ TEST(clientCase, agg_query_tables) { } taos_free_result(pRes); - pRes = taos_query(pConn, "show table distributed tup"); - if (taos_errno(pRes) != 0) { - printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); - taos_free_result(pRes); - ASSERT_TRUE(false); + int64_t st = 1685959293000; + for (int32_t i = 0; i < 10000000; ++i) { + char s[256] = {0}; + + while (1) { + sprintf(s, "insert into t1 values(%ld, %d)", st + i, i); + pRes = taos_query(pConn, s); + + int32_t ret = taos_errno(pRes); + taos_free_result(pRes); + if (ret == 0) { + break; + } + } + + while (1) { + sprintf(s, "insert into t2 values(%ld, %d)", st + i, i); + pRes = taos_query(pConn, s); + int32_t ret = taos_errno(pRes); + + taos_free_result(pRes); + if (ret == 0) { + break; + } + } } +// pRes = taos_query(pConn, "show table distributed tup"); +// if (taos_errno(pRes) != 0) { +// printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); +// taos_free_result(pRes); +// ASSERT_TRUE(false); +// } + printResult(pRes); taos_free_result(pRes); taos_close(pConn); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9c242d7c1e..02c56cc40f 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -240,7 +240,8 @@ int32_t tsTtlBatchDropNum = 10000; // number of tables dropped per batch // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; -int32_t tsStreamCheckpointTickInterval = 1; +int32_t tsStreamCheckpointTickInterval = 20; +int32_t tsStreamNodeCheckInterval = 10; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -250,7 +251,6 @@ char tsUdfdResFuncs[512] = ""; // udfd resident funcs that teardown when udf char tsUdfdLdLibPath[512] = ""; bool tsDisableStream = false; int64_t tsStreamBufferSize = 128 * 1024 * 1024; -int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000; bool tsFilterScalarMode = false; int32_t tsKeepTimeOffset = 0; // latency of data migration int tsResolveFQDNRetryTime = 100; // seconds @@ -263,6 +263,8 @@ char tsS3BucketName[TSDB_FQDN_LEN] = ""; char tsS3AppId[TSDB_FQDN_LEN] = ""; int8_t tsS3Enabled = false; +int32_t tsCheckpointInterval = 20; + #ifndef _STORAGE int32_t taosSetTfsCfg(SConfig *pCfg) { SConfigItem *pItem = cfgGetItem(pCfg, "dataDir"); @@ -1057,7 +1059,6 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsDisableStream = cfgGetItem(pCfg, "disableStream")->bval; tsStreamBufferSize = cfgGetItem(pCfg, "streamBufferSize")->i64; - tsCheckpointInterval = cfgGetItem(pCfg, "checkpointInterval")->i64; tsFilterScalarMode = cfgGetItem(pCfg, "filterScalarMode")->bval; tsKeepTimeOffset = cfgGetItem(pCfg, "keepTimeOffset")->i32; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index eaa80ba775..4c43326959 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -33,9 +33,11 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { return -1; } - SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica, + SMnodeOpt option = {.deploy = true, + .numOfReplicas = createReq.replica, .numOfTotalReplicas = createReq.replica + createReq.learnerReplica, - .selfIndex = -1, .lastIndex = createReq.lastIndex}; + .selfIndex = -1, + .lastIndex = createReq.lastIndex}; memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas)); for (int32_t i = 0; i < createReq.replica; ++i) { @@ -204,6 +206,10 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 8206b4e425..13b81231d4 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -76,10 +76,12 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index f43e1f5537..0251b9b636 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -783,20 +783,24 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; +// if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 6dd7a13c66..c4c0ea238d 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -148,39 +148,39 @@ typedef enum { } ECsmUpdateType; typedef struct { - int32_t id; - ETrnStage stage; - ETrnPolicy policy; - ETrnConflct conflict; - ETrnExec exec; - EOperType oper; - int32_t code; - int32_t failedTimes; - void* rpcRsp; - int32_t rpcRspLen; - int32_t redoActionPos; - SArray* prepareActions; - SArray* redoActions; - SArray* undoActions; - SArray* commitActions; - int64_t createdTime; - int64_t lastExecTime; - int32_t lastAction; - int32_t lastErrorNo; - SEpSet lastEpset; - tmsg_t lastMsgType; - tmsg_t originRpcType; - char dbname[TSDB_TABLE_FNAME_LEN]; - char stbname[TSDB_TABLE_FNAME_LEN]; - int32_t startFunc; - int32_t stopFunc; - int32_t paramLen; - void* param; - char opername[TSDB_TRANS_OPER_LEN]; - SArray* pRpcArray; - SRWLatch lockRpcArray; - int64_t mTraceId; - TdThreadMutex mutex; + int32_t id; + ETrnStage stage; + ETrnPolicy policy; + ETrnConflct conflict; + ETrnExec exec; + EOperType oper; + int32_t code; + int32_t failedTimes; + void* rpcRsp; + int32_t rpcRspLen; + int32_t redoActionPos; + SArray* prepareActions; + SArray* redoActions; + SArray* undoActions; + SArray* commitActions; + int64_t createdTime; + int64_t lastExecTime; + int32_t lastAction; + int32_t lastErrorNo; + SEpSet lastEpset; + tmsg_t lastMsgType; + tmsg_t originRpcType; + char dbname[TSDB_TABLE_FNAME_LEN]; + char stbname[TSDB_TABLE_FNAME_LEN]; + int32_t startFunc; + int32_t stopFunc; + int32_t paramLen; + void* param; + char opername[TSDB_TRANS_OPER_LEN]; + SArray* pRpcArray; + SRWLatch lockRpcArray; + int64_t mTraceId; + TdThreadMutex mutex; } STrans; typedef struct { @@ -453,20 +453,20 @@ typedef struct { } SStbObj; typedef struct { - char name[TSDB_FUNC_NAME_LEN]; - int64_t createdTime; - int8_t funcType; - int8_t scriptType; - int8_t align; - int8_t outputType; - int32_t outputLen; - int32_t bufSize; - int64_t signature; - int32_t commentSize; - int32_t codeSize; - char* pComment; - char* pCode; - int32_t funcVersion; + char name[TSDB_FUNC_NAME_LEN]; + int64_t createdTime; + int8_t funcType; + int8_t scriptType; + int8_t align; + int8_t outputType; + int32_t outputLen; + int32_t bufSize; + int64_t signature; + int32_t commentSize; + int32_t codeSize; + char* pComment; + char* pCode; + int32_t funcVersion; SRWLatch lock; } SFuncObj; @@ -561,10 +561,10 @@ typedef struct { int64_t subscribeTime; int64_t rebalanceTime; - int8_t withTbName; - int8_t autoCommit; - int32_t autoCommitInterval; - int32_t resetOffsetCfg; + int8_t withTbName; + int8_t autoCommit; + int32_t autoCommitInterval; + int32_t resetOffsetCfg; } SMqConsumerObj; SMqConsumerObj* tNewSMqConsumerObj(int64_t consumerId, char cgroup[TSDB_CGROUP_LEN]); @@ -574,8 +574,8 @@ void* tDecodeSMqConsumerObj(const void* buf, SMqConsumerObj* pConsumer typedef struct { int32_t vgId; -// char* qmsg; // SubPlanToString - SEpSet epSet; + // char* qmsg; // SubPlanToString + SEpSet epSet; } SMqVgEp; SMqVgEp* tCloneSMqVgEp(const SMqVgEp* pVgEp); @@ -589,10 +589,10 @@ typedef struct { SArray* offsetRows; // SArray } SMqConsumerEp; -//SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp); -//void tDeleteSMqConsumerEp(void* pEp); -int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pEp); -void* tDecodeSMqConsumerEp(const void* buf, SMqConsumerEp* pEp, int8_t sver); +// SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp); +// void tDeleteSMqConsumerEp(void* pEp); +int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pEp); +void* tDecodeSMqConsumerEp(const void* buf, SMqConsumerEp* pEp, int8_t sver); typedef struct { char key[TSDB_SUBSCRIBE_KEY_LEN]; @@ -606,7 +606,7 @@ typedef struct { SArray* unassignedVgs; // SArray SArray* offsetRows; char dbName[TSDB_DB_FNAME_LEN]; - char* qmsg; // SubPlanToString + char* qmsg; // SubPlanToString } SMqSubscribeObj; SMqSubscribeObj* tNewSubscribeObj(const char key[TSDB_SUBSCRIBE_KEY_LEN]); @@ -706,18 +706,21 @@ typedef struct { int64_t currentTick; // do not serialize int64_t deleteMark; int8_t igCheckUpdate; + + // 3.0.5. + int64_t checkpointId; } SStreamObj; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); -//typedef struct { -// char streamName[TSDB_STREAM_FNAME_LEN]; -// int64_t uid; -// int64_t streamUid; -// SArray* childInfo; // SArray -//} SStreamCheckpointObj; +// typedef struct { +// char streamName[TSDB_STREAM_FNAME_LEN]; +// int64_t uid; +// int64_t streamUid; +// SArray* childInfo; // SArray +// } SStreamCheckpointObj; #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 05adc17d64..19fd2a3fd4 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -33,6 +33,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); + // for sma // TODO refactor int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 3dab144eef..6bf4015852 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -77,10 +77,13 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; - // 3.0.20 + // 3.0.20 ver =2 if (tEncodeI64(pEncoder, pObj->checkpointFreq) < 0) return -1; if (tEncodeI8(pEncoder, pObj->igCheckUpdate) < 0) return -1; + // 3.0.50 ver = 3 + if (tEncodeI64(pEncoder, pObj->checkpointId) < 0) return -1; + tEndEncode(pEncoder); return pEncoder->pos; } @@ -151,6 +154,9 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { if (tDecodeI8(pDecoder, &pObj->igCheckUpdate) < 0) return -1; } } + if (sver >= 3) { + if (tDecodeI64(pDecoder, &pObj->checkpointId) < 0) return -1; + } tEndDecode(pDecoder); return 0; } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 12e28969c9..1c87cde78a 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -128,29 +128,31 @@ static void mndPullupTrimDb(SMnode *pMnode) { } static void mndCalMqRebalance(SMnode *pMnode) { - mTrace("calc mq rebalance"); int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { - SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen }; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } -#if 0 static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { int32_t contLen = 0; void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); if (pReq != NULL) { - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, - .pCont = pReq, - .contLen = contLen, - }; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + +static void mndStreamCheckNode(SMnode* pMnode) { + int32_t contLen = 0; + void *pReq = mndBuildTimerMsg(&contLen); + if (pReq != NULL) { + SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } -#endif static void mndPullupTelem(SMnode *pMnode) { mTrace("pullup telem msg"); @@ -279,11 +281,13 @@ static void *mndThreadFp(void *param) { mndCalMqRebalance(pMnode); } -#if 0 if (sec % tsStreamCheckpointTickInterval == 0) { mndStreamCheckpointTick(pMnode, sec); } -#endif + + if (sec % tsStreamNodeCheckInterval == 0) { + mndStreamCheckNode(pMnode); + } if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) { mndPullupTelem(pMnode); @@ -599,7 +603,7 @@ int32_t mndIsCatchUp(SMnode *pMnode) { return syncIsCatchUp(rid); } -ESyncRole mndGetRole(SMnode *pMnode){ +ESyncRole mndGetRole(SMnode *pMnode) { int64_t rid = pMnode->syncMgmt.sync; return syncGetRole(rid); } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 36771147a9..1d7d391acf 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -14,6 +14,8 @@ */ #include "mndScheduler.h" +#include "tmisce.h" +#include "mndMnode.h" #include "mndDb.h" #include "mndSnode.h" #include "mndVgroup.h" @@ -25,10 +27,8 @@ #define SINK_NODE_LEVEL (0) extern bool tsDeployOnSnode; -static int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream); static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, - SVgObj* pVgroup, int32_t fillHistory); -static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); + SVgObj* pVgroup, SEpSet* pEpset, int32_t fillHistory); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { @@ -141,7 +141,7 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr } } else { SStreamTask* pOneSinkTask = taosArrayGetP(pSinkNodeList, 0); - setFixedDownstreamEpInfo(pTask, pOneSinkTask); + streamTaskSetFixedDownstreamInfo(pTask, pOneSinkTask); } return 0; @@ -207,7 +207,8 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { } // create sink node for each vgroup. -int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) { +int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, SEpSet* pEpset, + int32_t fillHistory) { SSdb* pSdb = pMnode->pSdb; void* pIter = NULL; @@ -223,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea continue; } - mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory); + mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, pEpset, fillHistory); sdbRelease(pSdb, pVgroup); } @@ -231,7 +232,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea } int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, - int32_t fillHistory) { + SEpSet* pEpset, int32_t fillHistory) { int64_t uid = (fillHistory == 0)? pStream->uid:pStream->hTaskUid; SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList); if (pTask == NULL) { @@ -239,6 +240,8 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p return -1; } + epsetAssign(&(pTask)->info.mnodeEpset, pEpset); + pTask->info.nodeId = vgId; pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup); mndSetSinkTaskInfo(pStream, pTask); @@ -246,13 +249,15 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p } static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList, - SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory, - bool hasExtraSink, int64_t firstWindowSkey) { + SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset, + int8_t fillHistory, bool hasExtraSink, int64_t firstWindowSkey) { SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList); if (pTask == NULL) { return terrno; } + epsetAssign(&pTask->info.mnodeEpset, pEpset); + // todo set the correct ts, which should be last key of queried table. STimeWindow* pWindow = &pTask->dataRange.window; @@ -273,51 +278,12 @@ static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTas for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); - setTaskUpstreamEpInfo(pTask, pSinkTask); + streamTaskSetUpstreamInfo(pSinkTask, pTask); } return TSDB_CODE_SUCCESS; } -static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { - SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); - if (pEpInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pEpInfo->childId = pTask->info.selfChildId; - pEpInfo->epSet = pTask->info.epSet; - pEpInfo->nodeId = pTask->info.nodeId; - pEpInfo->taskId = pTask->id.taskId; - - return pEpInfo; -} - -void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) { - STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher; - pDispatcher->taskId = pTask->id.taskId; - pDispatcher->nodeId = pTask->info.nodeId; - pDispatcher->epSet = pTask->info.epSet; - - pDstTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH; - pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; -} - -int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream) { - SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask); - if (pEpInfo == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - if (pDownstream->pUpstreamEpInfoList == NULL) { - pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES); - } - - taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo); - return TSDB_CODE_SUCCESS; -} - static SArray* addNewTaskList(SArray* pTasksList) { SArray* pTaskList = taosArrayInit(0, POINTER_BYTES); taosArrayPush(pTasksList, &pTaskList); @@ -342,7 +308,7 @@ static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) { } static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream, - bool hasExtraSink, int64_t nextWindowSkey) { + SEpSet* pEpset, bool hasExtraSink, int64_t nextWindowSkey) { // create exec stream task, since only one level, the exec task is also the source task SArray* pTaskList = addNewTaskList(pStream->tasks); SSdb* pSdb = pMnode->pSdb; @@ -379,8 +345,8 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* // new stream task SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0, - hasExtraSink, nextWindowSkey); + int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, pEpset, + 0, hasExtraSink, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return -1; @@ -389,7 +355,7 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* if (pStream->conf.fillHistory) { SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL); code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid, - 1, hasExtraSink, nextWindowSkey); + pEpset, 1, hasExtraSink, nextWindowSkey); } sdbRelease(pSdb, pVgroup); @@ -406,13 +372,16 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* } static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask, - SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) { + SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, SEpSet* pEpset, + int64_t nextWindowSkey) { SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + epsetAssign(&(pTask)->info.mnodeEpset, pEpset); + // todo set the correct ts, which should be last key of queried table. STimeWindow* pWindow = &pTask->dataRange.window; pWindow->skey = INT64_MIN; @@ -422,22 +391,24 @@ static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t ui pWindow->skey, pWindow->ekey); // all the source tasks dispatch result to a single agg node. - setFixedDownstreamEpInfo(pTask, pDownstreamTask); + streamTaskSetFixedDownstreamInfo(pTask, pDownstreamTask); if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) { return -1; } - return setTaskUpstreamEpInfo(pTask, pDownstreamTask); + return streamTaskSetUpstreamInfo(pDownstreamTask, pTask); } static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream, - int32_t fillHistory, SStreamTask** pAggTask) { + SEpSet* pEpset, int32_t fillHistory, SStreamTask** pAggTask) { *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList); if (*pAggTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + epsetAssign(&(*pAggTask)->info.mnodeEpset, pEpset); + // dispatch if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) { return -1; @@ -446,8 +417,8 @@ static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeLi return 0; } -static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask, - SStreamTask** pHAggTask) { +static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SEpSet* pEpset, + SStreamTask** pAggTask, SStreamTask** pHAggTask) { SArray* pAggTaskList = addNewTaskList(pStream->tasks); SSdb* pSdb = pMnode->pSdb; @@ -461,7 +432,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan *pAggTask = NULL; SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask); + int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, pEpset, 0, pAggTask); if (code != TSDB_CODE_SUCCESS) { return -1; } @@ -489,7 +460,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL); *pHAggTask = NULL; - code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory, + code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pEpset, pStream->conf.fillHistory, pHAggTask); if (code != TSDB_CODE_SUCCESS) { if (pSnode != NULL) { @@ -519,7 +490,8 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan } static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream, - SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) { + SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, + SEpSet* pEpset, int64_t nextWindowSkey) { SArray* pSourceTaskList = addNewTaskList(pStream->tasks); SArray* pHSourceTaskList = NULL; @@ -549,7 +521,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } int32_t code = - doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey); + doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, pEpset, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); terrno = code; @@ -558,7 +530,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl if (pStream->conf.fillHistory) { code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup, - nextWindowSkey); + pEpset, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return code; @@ -576,16 +548,16 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList, - int32_t fillHistory) { + SEpSet* pEpset, int32_t fillHistory) { SArray* pSinkTaskList = addNewTaskList(pTasksList); if (pStream->fixedSinkVgId == 0) { - if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) { + if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, pEpset, fillHistory) < 0) { // TODO free return -1; } } else { if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg, - fillHistory) < 0) { + pEpset, fillHistory) < 0) { // TODO free return -1; } @@ -599,11 +571,11 @@ static void setSinkTaskUpstreamInfo(SArray* pTasksList, const SStreamTask* pUpst SArray* pSinkTaskList = taosArrayGetP(pTasksList, SINK_NODE_LEVEL); for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); - setTaskUpstreamEpInfo(pUpstreamTask, pSinkTask); + streamTaskSetUpstreamInfo(pSinkTask, pUpstreamTask); } } -static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) { +static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey, SEpSet* pEpset) { SSdb* pSdb = pMnode->pSdb; int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans); @@ -626,7 +598,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* hasExtraSink = true; SArray* pSinkTaskList = NULL; - int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0); + int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, pEpset, 0); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -634,7 +606,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* // check for fill history if (pStream->conf.fillHistory) { SArray* pHSinkTaskList = NULL; - code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1); + code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, pEpset, 1); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -649,7 +621,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* SStreamTask* pAggTask = NULL; SStreamTask* pHAggTask = NULL; - int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask); + int32_t code = addAggTask(pStream, pMnode, pPlan, pEpset, &pAggTask, &pHAggTask); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -658,9 +630,9 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* setSinkTaskUpstreamInfo(pStream->pHTasksList, pHAggTask); // source level - return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey); + return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, pEpset, nextWindowSkey); } else if (numOfPlanLevel == 1) { - return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey); + return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, pEpset, hasExtraSink, nextWindowSkey); } return 0; @@ -673,7 +645,10 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindo return -1; } - int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey); + SEpSet mnodeEpset = {0}; + mndGetMnodeEpSet(pMnode, &mnodeEpset); + + int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey, &mnodeEpset); qDestroyQueryPlan(pPlan); return code; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 03bb84b04d..8f4d52556e 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -26,13 +26,36 @@ #include "mndUser.h" #include "mndVgroup.h" #include "parser.h" +#include "tmisce.h" #include "tname.h" #include "audit.h" -#define MND_STREAM_VER_NUMBER 3 -#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_VER_NUMBER 3 +#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_MAX_NUM 60 +#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" -#define MND_STREAM_MAX_NUM 60 +typedef struct SNodeEntry { + int32_t nodeId; + SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. + int64_t hbTimestamp; // second +} SNodeEntry; + +typedef struct SStreamVnodeRevertIndex { + SArray *pNodeEntryList; + int64_t ts; // snapshot ts + SHashObj *pTaskMap; + SArray *pTaskList; + TdThreadMutex lock; +} SStreamVnodeRevertIndex; + +typedef struct SVgroupChangeInfo { + SHashObj *pDBMap; + SArray *pUpdateNodeList; // SArray +} SVgroupChangeInfo; + +static int32_t mndNodeCheckSentinel = 0; +static SStreamVnodeRevertIndex execNodeList; static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); @@ -41,6 +64,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessStreamHb(SRpcMsg *pReq); static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); @@ -50,6 +74,17 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter); static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq); static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq); +static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, + int64_t streamId, int32_t taskId); +static int32_t mndProcessNodeCheck(SRpcMsg *pReq); +static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); +static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode); + +static SArray *doExtractNodeListFromStream(SMnode *pMnode); +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode); +static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); +static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); +static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset); int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { @@ -64,16 +99,21 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_CREATE_STREAM, mndProcessCreateStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_DROP_STREAM, mndProcessDropStreamReq); - /*mndSetMsgHandle(pMnode, TDMT_MND_RECOVER_STREAM, mndProcessRecoverStreamReq);*/ + mndSetMsgHandle(pMnode, TDMT_MND_NODECHECK_TIMER, mndProcessNodeCheck); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DEPLOY_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_PAUSE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_RESUME_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_STOP_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_UPDATE_RSP, mndTransProcessRsp); - // mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); - // mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); mndSetMsgHandle(pMnode, TDMT_MND_PAUSE_STREAM, mndProcessPauseStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_RESUME_STREAM, mndProcessResumeStreamReq); @@ -83,10 +123,19 @@ int32_t mndInitStream(SMnode *pMnode) { mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndRetrieveStreamTask); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); + taosThreadMutexInit(&execNodeList.lock, NULL); + execNodeList.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); + execNodeList.pTaskList = taosArrayInit(4, sizeof(STaskStatusEntry)); + return sdbSetTable(pMnode->pSdb, table); } -void mndCleanupStream(SMnode *pMnode) {} +void mndCleanupStream(SMnode *pMnode) { + taosArrayDestroy(execNodeList.pTaskList); + taosHashCleanup(execNodeList.pTaskMap); + taosThreadMutexDestroy(&execNodeList.lock); + mDebug("mnd stream cleanup"); +} SSdbRaw *mndStreamActionEncode(SStreamObj *pStream) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -147,6 +196,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { if (sver != MND_STREAM_VER_NUMBER) { terrno = 0; + mError("stream read invalid ver, data ver: %d, curr ver: %d", sver, MND_STREAM_VER_NUMBER); goto STREAM_DECODE_OVER; } @@ -460,11 +510,7 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, SStreamTask *pTask) { STransAction action = {0}; action.mTraceId = pTrans->mTraceId; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = buf; - action.contLen = tlen; - action.msgType = TDMT_STREAM_TASK_DEPLOY; - + initTransAction(&action, buf, tlen, TDMT_STREAM_TASK_DEPLOY, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); return -1; @@ -640,8 +686,6 @@ _OVER: } static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { - // vnode - /*if (pTask->info.nodeId > 0) {*/ SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -653,15 +697,11 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { pReq->streamId = pTask->id.streamId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVDropStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_DROP; + initTransAction(&action, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; } - /*}*/ return 0; } @@ -762,16 +802,16 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } } - pDb = mndAcquireDb(pMnode, streamObj.sourceDb); - if (pDb->cfg.replications != 1) { - mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); - terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; - mndReleaseDb(pMnode, pDb); - pDb = NULL; - goto _OVER; - } + // pDb = mndAcquireDb(pMnode, streamObj.sourceDb); + // if (pDb->cfg.replications != 1) { + // mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); + // terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; + // mndReleaseDb(pMnode, pDb); + // pDb = NULL; + // goto _OVER; + // } - mndReleaseDb(pMnode, pDb); + // mndReleaseDb(pMnode, pDb); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); if (pTrans == NULL) { @@ -827,6 +867,10 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); + taosThreadMutexLock(&execNodeList.lock); + keepStreamTasksInBuf(&streamObj, &execNodeList); + taosThreadMutexUnlock(&execNodeList.lock); + code = TSDB_CODE_ACTION_IN_PROGRESS; char detail[2000] = {0}; @@ -855,55 +899,36 @@ _OVER: return code; } -#if 0 - static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; - SStreamObj *pStream = NULL; - - // iterate all stream obj - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) break; - // incr tick - int64_t currentTick = atomic_add_fetch_64(&pStream->currentTick, 1); - // if >= checkpointFreq, build msg TDMT_MND_STREAM_BEGIN_CHECKPOINT, put into write q - if (currentTick >= pStream->checkpointFreq) { - atomic_store_64(&pStream->currentTick, 0); - SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - - pMsg->streamId = pStream->uid; - pMsg->checkpointId = tGenIdPI64(); - memcpy(pMsg->streamName, pStream->name, TSDB_STREAM_FNAME_LEN); - - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, - .pCont = pMsg, - .contLen = sizeof(SMStreamDoCheckpointMsg), - }; - - tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); - } + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; } + int64_t checkpointId = taosGetTimestampMs(); + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + pMsg->checkpointId = checkpointId; + + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } -static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, const SStreamTask *pTask, - SMStreamDoCheckpointMsg *pMsg) { +static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, + int64_t streamId, int32_t taskId) { SStreamCheckpointSourceReq req = {0}; - req.checkpointId = pMsg->checkpointId; - req.nodeId = pTask->info.nodeId; + req.checkpointId = checkpointId; + req.nodeId = nodeId; req.expireTime = -1; - req.streamId = pTask->streamId; - req.taskId = pTask->taskId; + req.streamId = streamId; // pTask->id.streamId; + req.taskId = taskId; // pTask->id.taskId; int32_t code; int32_t blen; - tEncodeSize(tEncodeSStreamCheckpointSourceReq, &req, blen, code); + tEncodeSize(tEncodeStreamCheckpointSourceReq, &req, blen, code); if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -920,11 +945,11 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - tEncodeSStreamCheckpointSourceReq(&encoder, &req); + tEncodeStreamCheckpointSourceReq(&encoder, &req); SMsgHead *pMsgHead = (SMsgHead *)buf; pMsgHead->contLen = htonl(tlen); - pMsgHead->vgId = htonl(pTask->info.nodeId); + pMsgHead->vgId = htonl(nodeId); tEncoderClear(&encoder); @@ -933,95 +958,296 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con return 0; } +// static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { +// int64_t timestampMs = taosGetTimestampMs(); +// if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointTickInterval * 1000) { +// return -1; +// } -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; +// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, "stream-checkpoint"); +// if (pTrans == NULL) return -1; +// mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); +// if (mndTrancCheckConflict(pMnode, pTrans) != 0) { +// mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, +// checkpointId, +// tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); +// mndTransDrop(pTrans); +// return -1; +// } +// mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); +// atomic_store_64(&pStream->currentTick, 1); +// taosWLockLatch(&pStream->lock); +// // 1. redo action: broadcast checkpoint source msg for all source vg +// int32_t totLevel = taosArrayGetSize(pStream->tasks); +// for (int32_t i = 0; i < totLevel; i++) { +// SArray *pLevel = taosArrayGetP(pStream->tasks, i); +// SStreamTask *pTask = taosArrayGetP(pLevel, 0); +// if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { +// int32_t sz = taosArrayGetSize(pLevel); +// for (int32_t j = 0; j < sz; j++) { +// SStreamTask *pTask = taosArrayGetP(pLevel, j); +// /*A(pTask->info.nodeId > 0);*/ +// SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); +// if (pVgObj == NULL) { +// taosWUnLockLatch(&pStream->lock); +// mndTransDrop(pTrans); +// return -1; +// } - SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; +// void *buf; +// int32_t tlen; +// if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, +// pTask->id.taskId) < 0) { +// mndReleaseVgroup(pMnode, pVgObj); +// taosWUnLockLatch(&pStream->lock); +// mndTransDrop(pTrans); +// return -1; +// } - SStreamObj *pStream = mndAcquireStream(pMnode, pMsg->streamName); +// STransAction action = {0}; +// action.epSet = mndGetVgroupEpset(pMnode, pVgObj); +// action.pCont = buf; +// action.contLen = tlen; +// action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; - if (pStream == NULL || pStream->uid != pMsg->streamId) { - mError("start checkpointing failed since stream %s not found", pMsg->streamName); - return -1; - } +// mndReleaseVgroup(pMnode, pVgObj); - // build new transaction: - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "stream-checkpoint"); - if (pTrans == NULL) return -1; - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); - if (mndTransCheckConflict(pMnode, pTrans) != 0) { - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); - return -1; - } +// if (mndTransAppendRedoAction(pTrans, &action) != 0) { +// taosMemoryFree(buf); +// taosWUnLockLatch(&pStream->lock); +// mndReleaseStream(pMnode, pStream); +// mndTransDrop(pTrans); +// return -1; +// } +// } +// } +// } +// // 2. reset tick +// pStream->checkpointFreq = checkpointId; +// pStream->checkpointId = checkpointId; +// pStream->checkpointFreq = taosGetTimestampMs(); +// atomic_store_64(&pStream->currentTick, 0); +// // 3. commit log: stream checkpoint info +// pStream->version = pStream->version + 1; +// taosWUnLockLatch(&pStream->lock); + +// // // code condtion + +// SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); +// if (pCommitRaw == NULL) { +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { +// sdbFreeRaw(pCommitRaw); +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { +// sdbFreeRaw(pCommitRaw); +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } + +// if (mndTransPrepare(pMnode, pTrans) != 0) { +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// mndTransDrop(pTrans); +// return 0; +// _ERR: +// mndTransDrop(pTrans); +// return -1; +// } + +static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, + int64_t checkpointId) { + taosWLockLatch(&pStream->lock); - taosRLockLatch(&pStream->lock); - // 1. redo action: broadcast checkpoint source msg for all source vg int32_t totLevel = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < totLevel; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); SStreamTask *pTask = taosArrayGetP(pLevel, 0); + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { int32_t sz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < sz; j++) { - SStreamTask *pTask = taosArrayGetP(pLevel, j); + pTask = taosArrayGetP(pLevel, j); + if (pTask->info.fillHistory == 1) { + continue; + } /*A(pTask->info.nodeId > 0);*/ SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); if (pVgObj == NULL) { - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + taosWUnLockLatch(&pStream->lock); return -1; } void *buf; int32_t tlen; - if (mndBuildStreamCheckpointSourceReq(&buf, &tlen, pTask, pMsg) < 0) { - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + pTask->id.taskId) < 0) { + mndReleaseVgroup(pMnode, pVgObj); + taosWUnLockLatch(&pStream->lock); return -1; } STransAction action = {0}; - action.epSet = mndGetVgroupEpset(pMnode, pVgObj); - action.pCont = buf; - action.contLen = tlen; - action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; - + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset); mndReleaseVgroup(pMnode, pVgObj); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + taosWUnLockLatch(&pStream->lock); return -1; } } } } - // 2. reset tick + + pStream->checkpointId = checkpointId; + pStream->checkpointFreq = taosGetTimestampMs(); atomic_store_64(&pStream->currentTick, 0); // 3. commit log: stream checkpoint info - taosRUnLockLatch(&pStream->lock); + pStream->version = pStream->version + 1; - if (mndTransPrepare(pMnode, pTrans) != 0) { + taosWUnLockLatch(&pStream->lock); + + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL) { + mError("failed to prepare trans rebalance since %s", terrstr()); + return -1; + } + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + sdbFreeRaw(pCommitRaw); + mError("failed to prepare trans rebalance since %s", terrstr()); + return -1; + } + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { + sdbFreeRaw(pCommitRaw); mError("failed to prepare trans rebalance since %s", terrstr()); - mndTransDrop(pTrans); - mndReleaseStream(pMnode, pStream); return -1; } - - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); - return 0; } -#endif +static const char *mndGetStreamDB(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + return NULL; + } + + const char *p = taosStrdup(pStream->sourceDb); + mndReleaseStream(pMnode, pStream); + sdbCancelFetch(pSdb, pIter); + return p; +} + +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; + SStreamObj *pStream = NULL; + int32_t code = 0; + + { // check if the node update happens or not + int64_t ts = taosGetTimestampSec(); + + if (execNodeList.pNodeEntryList == NULL || (taosArrayGetSize(execNodeList.pNodeEntryList) == 0)) { + if (execNodeList.pNodeEntryList != NULL) { + execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); + } + + execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + } + + if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { + mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + execNodeList.ts = ts; + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; + } + + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); + + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); + bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0); + taosArrayDestroy(changeInfo.pUpdateNodeList); + taosHashCleanup(changeInfo.pDBMap); + taosArrayDestroy(pNodeSnapshot); + + if (nodeUpdated) { + mDebug("stream task not ready due to node update, not generate checkpoint"); + return 0; + } + } + + { // check if all tasks are in TASK_STATUS__NORMAL status + bool ready = true; + + taosThreadMutexLock(&execNodeList.lock); + for (int32_t i = 0; i < taosArrayGetSize(execNodeList.pTaskList); ++i) { + STaskStatusEntry *p = taosArrayGet(execNodeList.pTaskList, i); + if (p->status != TASK_STATUS__NORMAL) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, create checkpoint msg not issued", + p->streamId, p->taskId, 0, streamGetTaskStatusStr(p->status)); + ready = false; + break; + } + } + taosThreadMutexUnlock(&execNodeList.lock); + + if (!ready) { + return 0; + } + } + + SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + int64_t checkpointId = pMsg->checkpointId; + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, MND_STREAM_CHECKPOINT_NAME); + if (pTrans == NULL) { + mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return -1; + } + mDebug("start to trigger checkpoint, checkpointId: %" PRId64 "", checkpointId); + + const char *pDb = mndGetStreamDB(pMnode); + mndTransSetDbName(pTrans, pDb, "checkpoint"); + taosMemoryFree((void *)pDb); + + if (mndTransCheckConflict(pMnode, pTrans) != 0) { + mError("failed to trigger checkpoint, checkpointId: %" PRId64 ", reason:%s", checkpointId, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + mndTransDrop(pTrans); + return -1; + } + + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + + code = mndAddStreamCheckpointToTrans(pTrans, pStream, pMnode, checkpointId); + sdbRelease(pSdb, pStream); + if (code == -1) { + break; + } + } + + if (code == 0) { + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("failed to prepre trans rebalance since %s", terrstr()); + } + } + + mndTransDrop(pTrans); + return code; +} static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; @@ -1057,6 +1283,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { sdbRelease(pMnode->pSdb, pStream); return -1; } + mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); @@ -1065,6 +1292,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); return -1; } + // mndTransSetSerial(pTrans); // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { @@ -1381,18 +1609,18 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) { static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { SVPauseStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVPauseStreamTaskReq)); if (pReq == NULL) { + mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq), + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; pReq->streamId = pTask->id.streamId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVPauseStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_PAUSE; + initTransAction(&action, pReq, sizeof(SVPauseStreamTaskReq), TDMT_STREAM_TASK_PAUSE, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; @@ -1407,7 +1635,7 @@ int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray *tasks) { int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) { + if (mndPauseStreamTask(pTrans, pTask) < 0) { return -1; } @@ -1532,10 +1760,7 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SStreamTask *pTask, int8_t ig pReq->igUntreated = igUntreated; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVResumeStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_RESUME; + initTransAction(&action, pReq, sizeof(SVResumeStreamTaskReq), TDMT_STREAM_TASK_RESUME, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; @@ -1550,7 +1775,7 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { + if (mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { return -1; } @@ -1638,3 +1863,517 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return TSDB_CODE_ACTION_IN_PROGRESS; } + +static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChangeInfo *pInfo, int64_t streamId, + int32_t taskId) { + pMsg->streamId = streamId; + pMsg->taskId = taskId; + pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo)); + taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList); +} + +static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId, + int64_t streamId, int32_t taskId) { + SStreamTaskNodeUpdateMsg req = {0}; + initNodeUpdateMsg(&req, pInfo, streamId, taskId); + + int32_t code = 0; + int32_t blen; + + tEncodeSize(tEncodeStreamTaskUpdateMsg, &req, blen, code); + if (code < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + int32_t tlen = sizeof(SMsgHead) + blen; + + void *buf = taosMemoryMalloc(tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + tEncodeStreamTaskUpdateMsg(&encoder, &req); + + SMsgHead *pMsgHead = (SMsgHead *)buf; + pMsgHead->contLen = htonl(tlen); + pMsgHead->vgId = htonl(nodeId); + + tEncoderClear(&encoder); + + *pBuf = buf; + *pLen = tlen; + + return TSDB_CODE_SUCCESS; +} + +int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans) { + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL) { + mError("failed to encode stream since %s", terrstr()); + return -1; + } + + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("stream trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); + sdbFreeRaw(pCommitRaw); + mndTransDrop(pTrans); + return -1; + } + + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { + mError("stream trans:%d failed to set raw status since %s", pTrans->id, terrstr()); + sdbFreeRaw(pCommitRaw); + mndTransDrop(pTrans); + return -1; + } + + return 0; +} + +void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset) { + pAction->epSet = *pEpset; + pAction->contLen = contLen; + pAction->pCont = pCont; + pAction->msgType = msgType; +} + +// todo extract method: traverse stream tasks +// build trans to update the epset +static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, "stream-task-update"); + if (pTrans == NULL) { + mError("failed to build stream task DAG update, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return -1; + } + + mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); + + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + if (mndTransCheckConflict(pMnode, pTrans) != 0) { + mError("failed to build stream:0x%" PRIx64 " task DAG update, code:%s", pStream->uid, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + mndTransDrop(pTrans); + return -1; + } + + taosWLockLatch(&pStream->lock); + int32_t numOfLevels = taosArrayGetSize(pStream->tasks); + + for (int32_t j = 0; j < numOfLevels; ++j) { + SArray *pLevel = taosArrayGetP(pStream->tasks, j); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t k = 0; k < numOfTasks; ++k) { + SStreamTask *pTask = taosArrayGetP(pLevel, k); + + void *pBuf = NULL; + int32_t len = 0; + streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); + doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, pTask->id.streamId, pTask->id.taskId); + + STransAction action = {0}; + initTransAction(&action, pBuf, len, TDMT_VND_STREAM_TASK_UPDATE, &pTask->info.epSet); + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(pBuf); + taosWUnLockLatch(&pStream->lock); + return -1; + } + } + } + + taosWUnLockLatch(&pStream->lock); + + int32_t code = mndPersistTransLog(pStream, pTrans); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + + return TSDB_CODE_ACTION_IN_PROGRESS; +} + +static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) { + const SEp *pEp = GET_ACTIVE_EP(pPrevEpset); + + for (int32_t i = 0; i < pCurrent->numOfEps; ++i) { + const SEp *p = &(pCurrent->eps[i]); + if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) { + return false; + } + } + + return true; +} + +// 1. increase the replica does not affect the stream process. +// 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream +// tasks on the will be removed replica. +// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we will +// handle it as mentioned in 1 & 2 items. +static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList) { + SVgroupChangeInfo info = { + .pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo)), + .pDBMap = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK), + }; + + int32_t numOfNodes = taosArrayGetSize(pPrevNodeList); + for (int32_t i = 0; i < numOfNodes; ++i) { + SNodeEntry *pPrevEntry = taosArrayGet(pPrevNodeList, i); + + int32_t num = taosArrayGetSize(pNodeList); + for (int32_t j = 0; j < num; ++j) { + SNodeEntry *pCurrent = taosArrayGet(pNodeList, j); + + if (pCurrent->nodeId == pPrevEntry->nodeId) { + if (isNodeEpsetChanged(&pPrevEntry->epset, &pCurrent->epset)) { + const SEp *pPrevEp = GET_ACTIVE_EP(&pPrevEntry->epset); + + char buf[256] = {0}; + EPSET_TO_STR(&pCurrent->epset, buf); + mDebug("nodeId:%d epset changed detected, old:%s:%d -> new:%s", pCurrent->nodeId, pPrevEp->fqdn, + pPrevEp->port, buf); + + SNodeUpdateInfo updateInfo = {.nodeId = pPrevEntry->nodeId}; + epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset); + epsetAssign(&updateInfo.newEp, &pCurrent->epset); + taosArrayPush(info.pUpdateNodeList, &updateInfo); + + SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); + taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); + mndReleaseVgroup(pMnode, pVgroup); + } + + break; + } + } + } + + return info; +} + +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; + SVgObj *pVgroup = NULL; + + SArray *pVgroupListSnapshot = taosArrayInit(4, sizeof(SNodeEntry)); + + while (1) { + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); + if (pIter == NULL) { + break; + } + + SNodeEntry entry = {0}; + entry.epset = mndGetVgroupEpset(pMnode, pVgroup); + entry.nodeId = pVgroup->vgId; + entry.hbTimestamp = -1; + + taosArrayPush(pVgroupListSnapshot, &entry); + sdbRelease(pSdb, pVgroup); + } + + return pVgroupListSnapshot; +} + +static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) { + SSdb *pSdb = pMnode->pSdb; + + // check all streams that involved this vnode should update the epset info + SStreamObj *pStream = NULL; + void *pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + void *p = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb)); + void *p1 = taosHashGet(pChangeInfo->pDBMap, pStream->sourceDb, strlen(pStream->sourceDb)); + if (p == NULL && p1 == NULL) { + mndReleaseStream(pMnode, pStream); + continue; + } + + mDebug("stream:0x%" PRIx64 " involved node changed, create update trans", pStream->uid); + int32_t code = createStreamUpdateTrans(pMnode, pStream, pChangeInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return 0; +} + +static SArray *doExtractNodeListFromStream(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + taosWLockLatch(&pStream->lock); + int32_t numOfLevels = taosArrayGetSize(pStream->tasks); + + for (int32_t j = 0; j < numOfLevels; ++j) { + SArray *pLevel = taosArrayGetP(pStream->tasks, j); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t k = 0; k < numOfTasks; ++k) { + SStreamTask *pTask = taosArrayGetP(pLevel, k); + SNodeEntry entry = {0}; + epsetAssign(&entry.epset, &pTask->info.epSet); + entry.nodeId = pTask->info.nodeId; + entry.hbTimestamp = -1; + + taosHashPut(pHash, &entry.nodeId, sizeof(entry.nodeId), &entry, sizeof(entry)); + } + } + + taosWUnLockLatch(&pStream->lock); + sdbRelease(pSdb, pStream); + } + + SArray *plist = taosArrayInit(taosHashGetSize(pHash), sizeof(SNodeEntry)); + + // convert to list + pIter = NULL; + while ((pIter = taosHashIterate(pHash, pIter)) != NULL) { + SNodeEntry *pEntry = (SNodeEntry *)pIter; + taosArrayPush(plist, pEntry); + } + taosHashCleanup(pHash); + + return plist; +} + +static void doExtractTasksFromStream(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + keepStreamTasksInBuf(pStream, &execNodeList); + sdbRelease(pSdb, pStream); + } +} + +// this function runs by only one thread, so it is not multi-thread safe +static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { + int32_t code = 0; + int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); + if (old != 0) { + mDebug("still in checking node change"); + return 0; + } + + mDebug("start to do node change checking"); + int64_t ts = taosGetTimestampSec(); + + SMnode *pMnode = pMsg->info.node; + if (execNodeList.pNodeEntryList == NULL || (taosArrayGetSize(execNodeList.pNodeEntryList) == 0)) { + if (execNodeList.pNodeEntryList != NULL) { + execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); + } + + execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + } + + if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { + mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + execNodeList.ts = ts; + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; + } + + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); + + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); + if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { + code = mndProcessVgroupChange(pMnode, &changeInfo); + } + + taosArrayDestroy(changeInfo.pUpdateNodeList); + taosHashCleanup(changeInfo.pDBMap); + + // keep the new vnode snapshot + if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { + taosArrayDestroy(execNodeList.pNodeEntryList); + execNodeList.pNodeEntryList = pNodeSnapshot; + execNodeList.ts = ts; + } + + mDebug("end to do stream task node change checking"); + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; +} + +typedef struct SMStreamNodeCheckMsg { + int8_t holder; // // to fix windows compile error, define place holder +} SMStreamNodeCheckMsg; + +static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; + } + + SMStreamNodeCheckMsg *pMsg = rpcMallocCont(sizeof(SMStreamNodeCheckMsg)); + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + return 0; +} + +static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode) { + int32_t level = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < level; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfTasks; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; + + void *p = taosHashGet(pExecNode->pTaskMap, keys, sizeof(keys)); + if (p == NULL) { + STaskStatusEntry entry = { + .streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .status = TASK_STATUS__STOP}; + taosArrayPush(pExecNode->pTaskList, &entry); + + int32_t ordinal = taosArrayGetSize(pExecNode->pTaskList) - 1; + taosHashPut(pExecNode->pTaskMap, keys, sizeof(keys), &ordinal, sizeof(ordinal)); + } + } + } +} + +// todo: this process should be executed by the write queue worker of the mnode +int32_t mndProcessStreamHb(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + + SStreamHbMsg req = {0}; + int32_t code = TSDB_CODE_SUCCESS; + + SDecoder decoder = {0}; + tDecoderInit(&decoder, pReq->pCont, pReq->contLen); + + if (tDecodeStreamHbMsg(&decoder, &req) < 0) { + tDecoderClear(&decoder); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + tDecoderClear(&decoder); + + // int64_t now = taosGetTimestampSec(); + mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); + + taosThreadMutexLock(&execNodeList.lock); + int32_t numOfExisted = taosHashGetSize(execNodeList.pTaskMap); + if (numOfExisted == 0) { + doExtractTasksFromStream(pMnode); + } + + for(int32_t i = 0; i < req.numOfTasks; ++i) { + STaskStatusEntry* p = taosArrayGet(req.pTaskStatus, i); + int64_t k[2] = {p->streamId, p->taskId}; + int32_t index = *(int32_t*) taosHashGet(execNodeList.pTaskMap, &k, sizeof(k)); + + STaskStatusEntry* pStatusEntry = taosArrayGet(execNodeList.pTaskList, index); + pStatusEntry->status = p->status; + if (p->status != TASK_STATUS__NORMAL) { + mDebug("received s-task:0x%x not in ready status:%s", p->taskId, streamGetTaskStatusStr(p->status)); + } + } + taosThreadMutexUnlock(&execNodeList.lock); + + taosArrayDestroy(req.pTaskStatus); + + // bool nodeChanged = false; + // SArray* pList = taosArrayInit(4, sizeof(int32_t)); + /* + // record the timeout node + for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { + SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i); + int64_t duration = now - pEntry->hbTimestamp; + if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next + taosArrayPush(pList, &pEntry); + mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration); + continue; + } + + if (pEntry->nodeId != req.vgId) { + continue; + } + + pEntry->hbTimestamp = now; + + // check epset to identify whether the node has been transferred to other dnodes. + // node the epset is changed, which means the node transfer has occurred for this node. + // if (!isEpsetEqual(&pEntry->epset, &req.epset)) { + // nodeChanged = true; + // break; + // } + } + + // todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode, + // to identify whether the dnode is truely offline or not. + + // handle the node changed case + if (!nodeChanged) { + return TSDB_CODE_SUCCESS; + } + + int32_t nodeId = req.vgId; + + {// check all streams that involved this vnode should update the epset info + SStreamObj *pStream = NULL; + void *pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + // update the related upstream and downstream tasks, todo remove this, no need this function + taosWLockLatch(&pStream->lock); + // streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset); + // streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset); + taosWUnLockLatch(&pStream->lock); + + // code = createStreamUpdateTrans(pMnode, pStream, nodeId, ); + // if (code != TSDB_CODE_SUCCESS) { + // todo + //// } + // } + } + */ + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 93a152f0cc..5d150b731c 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -28,9 +28,9 @@ #define TRANS_ARRAY_SIZE 8 #define TRANS_RESERVE_SIZE 48 -static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans); -static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOld); -static int32_t mndTransDelete(SSdb *pSdb, STrans *pTrans, bool callFunc); +static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans); +static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOld); +static int32_t mndTransDelete(SSdb *pSdb, STrans *pTrans, bool callFunc); static int32_t mndTransAppendLog(SArray *pArray, SSdbRaw *pRaw); static int32_t mndTransAppendAction(SArray *pArray, STransAction *pAction); @@ -100,10 +100,9 @@ static int32_t mndTransGetActionsSize(SArray *pArray) { return rawDataLen; } - static int32_t mndTransEncodeAction(SSdbRaw *pRaw, int32_t *offset, SArray *pActions, int32_t actionsNum) { int32_t dataPos = *offset; - int8_t unused = 0; + int8_t unused = 0; int32_t ret = -1; for (int32_t i = 0; i < actionsNum; ++i) { @@ -266,16 +265,16 @@ _OVER: SSdbRow *mndTransDecode(SSdbRaw *pRaw) { terrno = TSDB_CODE_INVALID_MSG; - SSdbRow *pRow = NULL; - STrans *pTrans = NULL; - char *pData = NULL; - int32_t dataLen = 0; - int8_t sver = 0; - int32_t prepareActionNum = 0; - int32_t redoActionNum = 0; - int32_t undoActionNum = 0; - int32_t commitActionNum = 0; - int32_t dataPos = 0; + SSdbRow *pRow = NULL; + STrans *pTrans = NULL; + char *pData = NULL; + int32_t dataLen = 0; + int8_t sver = 0; + int32_t prepareActionNum = 0; + int32_t redoActionNum = 0; + int32_t undoActionNum = 0; + int32_t commitActionNum = 0; + int32_t dataPos = 0; if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto _OVER; @@ -577,7 +576,7 @@ STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, ETrnConflct conflict, pTrans->undoActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); pTrans->commitActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); pTrans->pRpcArray = taosArrayInit(1, sizeof(SRpcHandleInfo)); - pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : 0; + pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : tGenIdPI64(); taosInitRWLatch(&pTrans->lockRpcArray); taosThreadMutexInit(&pTrans->mutex, NULL); @@ -1342,7 +1341,7 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans) { - bool continueExec = true; + bool continueExec = true; int32_t code = 0; int32_t numOfActions = taosArrayGetSize(pTrans->prepareActions); diff --git a/source/dnode/mnode/sdb/src/sdbRaw.c b/source/dnode/mnode/sdb/src/sdbRaw.c index 3a16ee3f13..244e50b52e 100644 --- a/source/dnode/mnode/sdb/src/sdbRaw.c +++ b/source/dnode/mnode/sdb/src/sdbRaw.c @@ -46,7 +46,7 @@ SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) { void sdbFreeRaw(SSdbRaw *pRaw) { if (pRaw != NULL) { #if 1 - mTrace("raw:%p, is freed", pRaw); + mTrace("raw:%p, is freed, len:%d, table:%s", pRaw, pRaw->dataLen, sdbTableName(pRaw->type)); #endif taosMemoryFree(pRaw); } diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 635fdcf459..bafceb3f5f 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -57,45 +57,49 @@ FAIL: } int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0); - - pTask->refCnt = 1; - pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); - - pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputInfo.queue = streamQueueOpen(512 << 10); - - if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { - return -1; + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamInfoList) != 0); + int32_t code = streamTaskInit(pTask, pSnode->pMeta, &pSnode->msgCb, ver); + if (code != TSDB_CODE_SUCCESS) { + return code; } - pTask->tsInfo.init = taosGetTimestampMs(); - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; - pTask->pMsgCb = &pSnode->msgCb; - pTask->chkInfo.version = ver; - pTask->pMeta = pSnode->pMeta; - streamTaskOpenAllUpstreamInput(pTask); pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { + qError("s-task:%s failed to open state for task", pTask->id.idStr); return -1; + } else { + qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamInfoList); SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; initStreamStateAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId); ASSERT(pTask->exec.pExecutor); - taosThreadMutexInit(&pTask->lock, NULL); + streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); - qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE, - pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel); + SCheckpointInfo* pChkInfo = &pTask->chkInfo; + // checkpoint ver is the kept version, handled data should be the next version. + if (pTask->chkInfo.checkpointId != 0) { + pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; + qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + } else { + if (pTask->chkInfo.currentVer == -1) { + pTask->chkInfo.currentVer = 0; + } + } + + qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", + SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, pTask->triggerParam); return 0; } @@ -113,12 +117,16 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } pSnode->msgCb = pOption->msgCb; - pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE); + pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } + // todo fix it: send msg to mnode to rollback to an existed checkpoint, and broadcast the rollback msg to all other + // computing nodes. + pSnode->pMeta->stage = 0; + return pSnode; FAIL: @@ -128,6 +136,7 @@ FAIL: } void sndClose(SSnode *pSnode) { + streamMetaNotifyClose(pSnode->pMeta); streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); taosMemoryFree(pSnode->path); @@ -216,7 +225,7 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); if (pTask) { - SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp, exec); streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; @@ -237,7 +246,7 @@ int32_t sndProcessTaskRetrieveReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.dstTaskId); if (pTask) { - SRpcMsg rsp = { .info = pMsg->info, .code = 0}; + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessRetrieveReq(pTask, &req, &rsp); streamMetaReleaseTask(pSnode->pMeta, pTask); tDeleteStreamRetrieveReq(&req); @@ -343,7 +352,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); streamMetaReleaseTask(pSnode->pMeta, pTask); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); @@ -351,9 +360,8 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; - qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 - ") from task:0x%x (vgId:%d), rsp status %d", - taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", + taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } SEncoder encoder; @@ -424,13 +432,13 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { return sndProcessTaskRetrieveReq(pSnode, pMsg); case TDMT_STREAM_RETRIEVE_RSP: return sndProcessTaskRetrieveRsp(pSnode, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); - case TDMT_STREAM_TASK_CHECK: + case TDMT_VND_STREAM_TASK_CHECK: return sndProcessStreamTaskCheckReq(pSnode, pMsg); - case TDMT_STREAM_TASK_CHECK_RSP: + case TDMT_VND_STREAM_TASK_CHECK_RSP: return sndProcessStreamTaskCheckRsp(pSnode, pMsg); default: ASSERT(0); diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index c2b41392e8..6c5eeb3424 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -67,6 +67,9 @@ set( "src/tq/tqRestore.c" "src/tq/tqSnapshot.c" "src/tq/tqOffsetSnapshot.c" + "src/tq/tqStreamStateSnap.c" + "src/tq/tqStreamTaskSnap.c" + ) aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index f08c308185..93d4b2163d 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -55,7 +55,7 @@ typedef struct { typedef struct { int64_t suid; - char* qmsg; // SubPlanToString + char* qmsg; // SubPlanToString SNode* node; } STqExecTb; @@ -81,18 +81,21 @@ typedef enum tq_handle_status { } tq_handle_status; typedef struct { - char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - int64_t consumerId; - int32_t epoch; - int8_t fetchMeta; - int64_t snapshotVer; - SWalReader* pWalReader; - SWalRef* pRef; -// STqPushHandle pushHandle; // push - STqExecHandle execHandle; // exec - SRpcMsg* msg; - tq_handle_status status; + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; + int64_t consumerId; + int32_t epoch; + int8_t fetchMeta; + int64_t snapshotVer; + SWalReader* pWalReader; + SWalRef* pRef; + // STqPushHandle pushHandle; // push + STqExecHandle execHandle; // exec + SRpcMsg* msg; + tq_handle_status status; } STqHandle; +typedef struct { + int64_t snapshotVer; +} SStreamHandle; struct STQ { SVnode* pVnode; @@ -109,17 +112,10 @@ struct STQ { SStreamMeta* pStreamMeta; }; -typedef struct { - int8_t inited; - tmr_h timer; -} STqMgmt; - typedef struct { int32_t size; } STqOffsetHead; -static STqMgmt tqMgmt = {0}; - int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle); int32_t tDecodeSTqHandle(SDecoder* pDecoder, STqHandle* pHandle); void tqDestroyTqHandle(void* data); @@ -159,7 +155,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore); // tqSink int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr); -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data); // tqOffset char* tqOffsetBuildFName(const char* path, int32_t fVer); @@ -176,6 +172,8 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset); +void tqUpdateNodeStage(STQ* pTq); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index be663c2be9..5f5c27bfdd 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -89,10 +89,11 @@ typedef struct SQueryNode SQueryNode; #define VNODE_RSMA0_DIR "tsdb" #define VNODE_RSMA1_DIR "rsma1" #define VNODE_RSMA2_DIR "rsma2" +#define VNODE_TQ_STREAM "stream" #define VNODE_BUFPOOL_SEGMENTS 3 -#define VND_INFO_FNAME "vnode.json" +#define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" // vnd.h @@ -214,16 +215,19 @@ int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid int32_t tsdbSetKeepCfg(STsdb* pTsdb, STsdbCfg* pCfg); // tq -int tqInit(); -void tqCleanUp(); -STQ* tqOpen(const char* path, SVnode* pVnode); -void tqNotifyClose(STQ*); -void tqClose(STQ*); -int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); -int tqUnregisterPushHandle(STQ* pTq, void* pHandle); -int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. -int tqCheckStreamStatus(STQ* pTq); +int tqInit(); +void tqCleanUp(); +STQ* tqOpen(const char* path, SVnode* pVnode); +void tqNotifyClose(STQ*); +void tqClose(STQ*); +int tqPushMsg(STQ*, tmsg_t msgType); +int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); +int tqUnregisterPushHandle(STQ* pTq, void* pHandle); +int tqStartStreamTasks(STQ* pTq, bool ckPause); // restore all stream tasks after vnode launching completed. +int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqCheckStreamStatus(STQ* pTq); int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); @@ -313,6 +317,26 @@ int32_t tqOffsetWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqOffsetWriter int32_t tqOffsetWriterClose(STqOffsetWriter** ppWriter, int8_t rollback); int32_t tqOffsetSnapWrite(STqOffsetWriter* pWriter, uint8_t* pData, uint32_t nData); // SStreamTaskWriter ====================================== + +int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader); +int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader); +int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData); + +int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter); +int32_t streamTaskSnapWriterClose(SStreamTaskWriter* ppWriter, int8_t rollback); +int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData); + +int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader); +int32_t streamStateSnapReaderClose(SStreamStateReader* pReader); +int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData); + +int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter); +int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback); +int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId); + +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter); + // SStreamTaskReader ====================================== // SStreamStateWriter ===================================== // SStreamStateReader ===================================== @@ -476,7 +500,9 @@ enum { SNAP_DATA_TQ_HANDLE = 7, SNAP_DATA_TQ_OFFSET = 8, SNAP_DATA_STREAM_TASK = 9, - SNAP_DATA_STREAM_STATE = 10, + SNAP_DATA_STREAM_TASK_CHECKPOINT = 10, + SNAP_DATA_STREAM_STATE = 11, + SNAP_DATA_STREAM_STATE_BACKEND = 12, }; struct SSnapDataHdr { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a502e3e314..87617a6812 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -14,6 +14,13 @@ */ #include "tq.h" +#include "vnd.h" + +typedef struct { + int8_t inited; +} STqMgmt; + +static STqMgmt tqMgmt = {0}; // 0: not init // 1: already inited @@ -32,11 +39,6 @@ int32_t tqInit() { } if (old == 0) { - tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ"); - if (tqMgmt.timer == NULL) { - atomic_store_8(&tqMgmt.inited, 0); - return -1; - } if (streamInit() < 0) { return -1; } @@ -54,7 +56,6 @@ void tqCleanUp() { } if (old == 1) { - taosTmrCleanUp(tqMgmt.timer); streamCleanUp(); atomic_store_8(&tqMgmt.inited, 0); } @@ -127,14 +128,12 @@ int32_t tqInitialize(STQ* pTq) { return -1; } - pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId); + pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId, -1); if (pTq->pStreamMeta == NULL) { return -1; } - // the version is kept in task's meta data - // todo check if this version is required or not - if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pTq->pVnode->pWal)) < 0) { + if (streamLoadTasks(pTq->pStreamMeta) < 0) { return -1; } @@ -142,6 +141,7 @@ int32_t tqInitialize(STQ* pTq) { } void tqClose(STQ* pTq) { + qDebug("start to close tq"); if (pTq == NULL) { return; } @@ -151,7 +151,7 @@ void tqClose(STQ* pTq) { STqHandle* pHandle = *(STqHandle**)pIter; int32_t vgId = TD_VID(pTq->pVnode); - if(pHandle->msg != NULL) { + if (pHandle->msg != NULL) { tqPushEmptyDataRsp(pHandle, vgId); rpcFreeCont(pHandle->msg->pCont); taosMemoryFree(pHandle->msg); @@ -167,119 +167,17 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq->path); tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); + qDebug("end to close tq"); taosMemoryFree(pTq); } -static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { - bool inTimer = false; - - taosWLockLatch(&pMeta->lock); - - void* pIter = NULL; - while(1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->status.timerActive >= 1) { - inTimer = true; - } - } - - taosWUnLockLatch(&pMeta->lock); - - return inTimer; -} - void tqNotifyClose(STQ* pTq) { - if (pTq != NULL) { - taosWLockLatch(&pTq->pStreamMeta->lock); - - void* pIter = NULL; - while (1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr); - pTask->status.taskStatus = TASK_STATUS__STOP; - - int64_t st = taosGetTimestampMs(); - qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); - - int64_t el = taosGetTimestampMs() - st; - tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el); - } - - taosWUnLockLatch(&pTq->pStreamMeta->lock); - - tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId); - - int64_t st = taosGetTimestampMs(); - - while(hasStreamTaskInTimer(pTq->pStreamMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pTq->pStreamMeta->vgId); - taosMsleep(100); - } - - int64_t el = taosGetTimestampMs() - st; - tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el); + if (pTq == NULL) { + return; } + streamMetaNotifyClose(pTq->pStreamMeta); } -//static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, -// int64_t consumerId, int32_t type) { -// int32_t len = 0; -// int32_t code = 0; -// -// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) { -// tEncodeSize(tEncodeMqDataRsp, pRsp, len, code); -// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) { -// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); -// } -// -// if (code < 0) { -// return -1; -// } -// -// int32_t tlen = sizeof(SMqRspHead) + len; -// void* buf = rpcMallocCont(tlen); -// if (buf == NULL) { -// return -1; -// } -// -// ((SMqRspHead*)buf)->mqMsgType = type; -// ((SMqRspHead*)buf)->epoch = epoch; -// ((SMqRspHead*)buf)->consumerId = consumerId; -// -// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); -// -// SEncoder encoder = {0}; -// tEncoderInit(&encoder, abuf, len); -// -// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) { -// tEncodeMqDataRsp(&encoder, pRsp); -// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) { -// tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp); -// } -// -// tEncoderClear(&encoder); -// -// SRpcMsg rsp = { -// .info = *pRpcHandleInfo, -// .pCont = buf, -// .contLen = tlen, -// .code = 0, -// }; -// -// tmsgSendRsp(&rsp); -// return 0; -//} - int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) { SMqPollReq req = {0}; if (tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req) < 0) { @@ -293,33 +191,14 @@ int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) { dataRsp.blockNum = 0; char buf[TSDB_OFFSET_LEN] = {0}; tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset); - tqInfo("tqPushEmptyDataRsp to consumer:0x%"PRIx64 " vgId:%d, offset:%s, reqId:0x%" PRIx64, req.consumerId, vgId, buf, req.reqId); + tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s, reqId:0x%" PRIx64, req.consumerId, vgId, buf, + req.reqId); tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); tDeleteMqDataRsp(&dataRsp); return 0; } -//int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) { -// SMqDataRsp dataRsp = {0}; -// dataRsp.head.consumerId = pHandle->consumerId; -// dataRsp.head.epoch = pHandle->epoch; -// dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; -// -// int64_t sver = 0, ever = 0; -// walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); -// tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP, sver, -// ever); -// -// char buf1[TSDB_OFFSET_LEN] = {0}; -// char buf2[TSDB_OFFSET_LEN] = {0}; -// tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset); -// tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset); -// tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", vgId, -// dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2); -// return 0; -//} - int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type, int32_t vgId) { int64_t sver = 0, ever = 0; @@ -332,8 +211,8 @@ int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* tFormatOffset(buf1, TSDB_OFFSET_LEN, &pRsp->reqOffset); tFormatOffset(buf2, TSDB_OFFSET_LEN, &pRsp->rspOffset); - tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, vgId, - pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId); + tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, + vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId); return 0; } @@ -366,7 +245,7 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); if (pSavedOffset != NULL && tqOffsetEqual(pOffset, pSavedOffset)) { tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64, - vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version); + vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version); return 0; // no need to update the offset value } @@ -379,10 +258,10 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t } int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) { - SMqSeekReq req = {0}; - int32_t vgId = TD_VID(pTq->pVnode); - SRpcMsg rsp = {.info = pMsg->info}; - int code = 0; + SMqSeekReq req = {0}; + int32_t vgId = TD_VID(pTq->pVnode); + SRpcMsg rsp = {.info = pMsg->info}; + int code = 0; if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -407,8 +286,8 @@ int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) { goto end; } - //if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to TMQ_VG_STATUS__IDLE, - //otherwise poll data failed after seek. + // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to + // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek. tqUnregisterPushHandle(pTq, pHandle); taosRUnLockLatch(&pTq->lock); @@ -417,85 +296,85 @@ end: tmsgSendRsp(&rsp); return 0; -// SMqVgOffset vgOffset = {0}; -// int32_t vgId = TD_VID(pTq->pVnode); -// -// SDecoder decoder; -// tDecoderInit(&decoder, (uint8_t*)msg, msgLen); -// if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { -// tqError("vgId:%d failed to decode seek msg", vgId); -// return -1; -// } -// -// tDecoderClear(&decoder); -// -// tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, -// vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); -// -// STqOffset* pOffset = &vgOffset.offset; -// if (pOffset->val.type != TMQ_OFFSET__LOG) { -// tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); -// return -1; -// } -// -// STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); -// if (pHandle == NULL) { -// tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey); -// terrno = TSDB_CODE_INVALID_MSG; -// return -1; -// } -// -// // 2. check consumer-vg assignment status -// taosRLockLatch(&pTq->lock); -// if (pHandle->consumerId != vgOffset.consumerId) { -// tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, -// vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); -// terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; -// taosRUnLockLatch(&pTq->lock); -// return -1; -// } -// taosRUnLockLatch(&pTq->lock); -// -// // 3. check the offset info -// STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); -// if (pSavedOffset != NULL) { -// if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { -// tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); -// return 0; // no need to update the offset value -// } -// -// if (pSavedOffset->val.version == pOffset->val.version) { -// tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, -// pOffset->val.version, pSavedOffset->val.version); -// return 0; -// } -// } -// -// int64_t sver = 0, ever = 0; -// walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); -// if (pOffset->val.version < sver) { -// pOffset->val.version = sver; -// } else if (pOffset->val.version > ever) { -// pOffset->val.version = ever; -// } -// -// // save the new offset value -// if (pSavedOffset != NULL) { -// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, -// pSavedOffset->val.version); -// } else { -// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); -// } -// -// if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { -// tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); -// return -1; -// } -// -// tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, -// vgOffset.consumerId, vgOffset.offset.val.version); -// -// return 0; + // SMqVgOffset vgOffset = {0}; + // int32_t vgId = TD_VID(pTq->pVnode); + // + // SDecoder decoder; + // tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + // if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { + // tqError("vgId:%d failed to decode seek msg", vgId); + // return -1; + // } + // + // tDecoderClear(&decoder); + // + // tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, + // vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); + // + // STqOffset* pOffset = &vgOffset.offset; + // if (pOffset->val.type != TMQ_OFFSET__LOG) { + // tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); + // return -1; + // } + // + // STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); + // if (pHandle == NULL) { + // tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, + // pOffset->subKey); terrno = TSDB_CODE_INVALID_MSG; return -1; + // } + // + // // 2. check consumer-vg assignment status + // taosRLockLatch(&pTq->lock); + // if (pHandle->consumerId != vgOffset.consumerId) { + // tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" + // PRIx64, + // vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); + // terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + // taosRUnLockLatch(&pTq->lock); + // return -1; + // } + // taosRUnLockLatch(&pTq->lock); + // + // // 3. check the offset info + // STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); + // if (pSavedOffset != NULL) { + // if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { + // tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); + // return 0; // no need to update the offset value + // } + // + // if (pSavedOffset->val.version == pOffset->val.version) { + // tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, + // pOffset->val.version, pSavedOffset->val.version); + // return 0; + // } + // } + // + // int64_t sver = 0, ever = 0; + // walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); + // if (pOffset->val.version < sver) { + // pOffset->val.version = sver; + // } else if (pOffset->val.version > ever) { + // pOffset->val.version = ever; + // } + // + // // save the new offset value + // if (pSavedOffset != NULL) { + // tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, + // pSavedOffset->val.version); + // } else { + // tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); + // } + // + // if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { + // tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); + // return -1; + // } + // + // tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, + // vgOffset.consumerId, vgOffset.offset.val.version); + // + // return 0; } int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { @@ -538,8 +417,11 @@ int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) { tqError("pHandle->msg should not be null"); taosHashCancelIterate(pTq->pPushMgr, pIter); break; - }else{ - SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; + } else { + SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, + .pCont = pHandle->msg->pCont, + .contLen = pHandle->msg->contLen, + .info = pHandle->msg->info}; tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); taosMemoryFree(pHandle->msg); pHandle->msg = NULL; @@ -574,10 +456,10 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // 1. find handle pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { - do{ - if (tqMetaGetHandle(pTq, req.subKey) == 0){ + do { + if (tqMetaGetHandle(pTq, req.subKey) == 0) { pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if(pHandle != NULL){ + if (pHandle != NULL) { break; } } @@ -585,7 +467,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { terrno = TSDB_CODE_INVALID_MSG; taosWUnLockLatch(&pTq->lock); return -1; - }while(0); + } while (0); } // 2. check re-balance status @@ -636,7 +518,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) { - void* data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + void* data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); SMqVgOffset vgOffset = {0}; @@ -678,7 +560,6 @@ int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) { SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0}; tmsgSendRsp(&rsp); - return 0; } @@ -730,7 +611,7 @@ int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) { if (reqOffset.type == TMQ_OFFSET__LOG) { dataRsp.rspOffset.version = reqOffset.version; - } else if(reqOffset.type < 0){ + } else if (reqOffset.type < 0) { STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey); if (pOffset != NULL) { if (pOffset->val.type != TMQ_OFFSET__LOG) { @@ -741,14 +622,16 @@ int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) { } dataRsp.rspOffset.version = pOffset->val.version; - tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%"PRId64, consumerId, vgId, req.subKey, dataRsp.rspOffset.version); - }else{ + tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId, + req.subKey, dataRsp.rspOffset.version); + } else { if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) { dataRsp.rspOffset.version = sver; // not consume yet, set the earliest position } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { dataRsp.rspOffset.version = ever; } - tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%"PRId64, consumerId, vgId, req.subKey, dataRsp.rspOffset.version); + tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey, + dataRsp.rspOffset.version); } } else { tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey, @@ -841,7 +724,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg SMqRebVgReq req = {0}; SDecoder dc = {0}; - tDecoderInit(&dc, msg, msgLen); + tDecoderInit(&dc, (uint8_t*)msg, msgLen); // decode req if (tDecodeSMqRebVgReq(&dc, &req) < 0) { @@ -851,12 +734,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, - req.oldConsumerId, req.newConsumerId); + req.oldConsumerId, req.newConsumerId); STqHandle* pHandle = NULL; - while(1){ + while (1) { pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if (pHandle || tqMetaGetHandle(pTq, req.subKey) < 0){ + if (pHandle || tqMetaGetHandle(pTq, req.subKey) < 0) { break; } } @@ -872,7 +755,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } STqHandle handle = {0}; ret = tqCreateHandle(pTq, &req, &handle); - if(ret < 0){ + if (ret < 0) { tqDestroyTqHandle(&handle); goto end; } @@ -883,7 +766,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); } else { - tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); + tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, + req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_store_32(&pHandle->epoch, 0); tqUnregisterPushHandle(pTq, pHandle); @@ -901,52 +785,41 @@ void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { int32_t vgId = TD_VID(pTq->pVnode); + tqDebug("s-task:0x%x start to expand task", pTask->id.taskId); - pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); - pTask->refCnt = 1; - pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputInfo.queue = streamQueueOpen(512 << 10); - - if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { - tqError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); - return -1; + int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, ver); + if (code != TSDB_CODE_SUCCESS) { + return code; } - pTask->tsInfo.init = taosGetTimestampMs(); - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; - pTask->pMsgCb = &pTq->pVnode->msgCb; - pTask->pMeta = pTq->pStreamMeta; - streamTaskOpenAllUpstreamInput(pTask); - // backup the initial status, and set it to be TASK_STATUS__INIT - pTask->chkInfo.version = ver; - pTask->chkInfo.currentVer = ver; - - pTask->dataRange.range.maxVer = ver; - pTask->dataRange.range.minVer = ver; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + SStreamTask* pStateTask = pTask; + SStreamTask task = {0}; if (pTask->info.fillHistory) { task.id = pTask->streamTaskId; task.pMeta = pTask->pMeta; - pSateTask = &task; + pStateTask = &task; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pStateTask, false, -1, -1); if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - SReadHandle handle = {.vnode = pTq->pVnode, - .initTqReader = 1, - .pStateBackend = pTask->pState, - .fillHistory = pTask->info.fillHistory, - .winRange = pTask->dataRange.window}; + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = pTq->pVnode, + .initTqReader = 1, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; + initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId, pTask->id.taskId); @@ -957,23 +830,31 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + SStreamTask task = {0}; if (pTask->info.fillHistory) { task.id = pTask->streamTaskId; task.pMeta = pTask->pMeta; pSateTask = &task; } + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList); - SReadHandle handle = {.vnode = NULL, - .numOfVgroups = numOfVgroups, - .pStateBackend = pTask->pState, - .fillHistory = pTask->info.fillHistory, - .winRange = pTask->dataRange.window}; + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamInfoList); + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = NULL, + .numOfVgroups = numOfVgroups, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; + initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId, pTask->id.taskId); @@ -993,7 +874,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { int32_t ver1 = 1; SMetaInfo info = {0}; - int32_t code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); + code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); if (code == TSDB_CODE_SUCCESS) { ver1 = info.skmVer; } @@ -1003,6 +884,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->tbSink.pTSchema == NULL) { return -1; } + pTask->tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr); } @@ -1018,16 +900,23 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->status.taskStatus = TASK_STATUS__NORMAL; } - taosThreadMutexInit(&pTask->lock, NULL); + streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); + SCheckpointInfo* pChkInfo = &pTask->chkInfo; - tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 - " child id:%d, level:%d, fill-history:%d, trigger:%" PRId64 " ms, disable pause", - vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel, + // checkpoint ver is the kept version, handled data should be the next version. + if (pTask->chkInfo.checkpointId != 0) { + pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; + tqInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + } + + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->info.fillHistory, pTask->triggerParam); - // next valid version will add one - pTask->chkInfo.version += 1; return 0; } @@ -1057,12 +946,12 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); streamMetaReleaseTask(pTq->pStreamMeta, pTask); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - tqDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", - pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", + pTask->id.idStr, pStatus, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 @@ -1074,7 +963,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t code; @@ -1083,7 +972,6 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)pReq, len); code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); - if (code < 0) { tDecoderClear(&decoder); return -1; @@ -1095,8 +983,8 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, - pTq->pStreamMeta->vgId); + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed", + rsp.streamId, rsp.upstreamTaskId, pTq->pStreamMeta->vgId); terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; return -1; } @@ -1111,9 +999,12 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms int32_t vgId = TD_VID(pTq->pVnode); if (tsDisableStream) { + tqInfo("vgId:%d stream disabled, not deploy stream tasks", vgId); return 0; } + tqDebug("vgId:%d receive new stream task deploy msg, start to build stream task", vgId); + // 1.deserialize msg and build task SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { @@ -1126,18 +1017,18 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); code = tDecodeStreamTask(&decoder, pTask); + tDecoderClear(&decoder); + if (code < 0) { - tDecoderClear(&decoder); taosMemoryFree(pTask); return -1; } - tDecoderClear(&decoder); - SStreamMeta* pStreamMeta = pTq->pStreamMeta; - // 2.save task, use the newest commit version as the initial start version of stream task. + // 2.save task, use the latest commit version as the initial start version of stream task. int32_t taskId = pTask->id.taskId; + int64_t streamId = pTask->id.streamId; bool added = false; taosWLockLatch(&pStreamMeta->lock); @@ -1146,21 +1037,34 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms taosWUnLockLatch(&pStreamMeta->lock); if (code < 0) { - tqError("vgId:%d failed to add s-task:0x%x, total:%d", vgId, pTask->id.taskId, numOfTasks); + tqError("vgId:%d failed to add s-task:0x%x, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); tFreeStreamTask(pTask); return -1; } - // not added into meta store + // added into meta store, pTask cannot be reference since it may have been destroyed by other threads already now if + // it is added into the meta store if (added) { - tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); - SStreamTask* p = streamMetaAcquireTask(pStreamMeta, pTask->id.streamId, taskId); - if (p != NULL) { // reset the downstreamReady flag. - streamTaskCheckDownstreamTasks(p); + // only handled in the leader node + if (vnodeIsRoleLeader(pTq->pVnode)) { + tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); + SStreamTask* p = streamMetaAcquireTask(pStreamMeta, streamId, taskId); + + bool restored = pTq->pVnode->restored; + if (p != NULL && restored) { + streamTaskCheckDownstreamTasks(p); + } else if (!restored) { + tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); + } + + if (p != NULL) { + streamMetaReleaseTask(pStreamMeta, p); + } + } else { + tqDebug("vgId:%d not leader, not launch stream task s-task:0x%x", vgId, taskId); } - streamMetaReleaseTask(pStreamMeta, p); } else { - tqWarn("vgId:%d failed to add s-task:0x%x, already exists in meta store", vgId, taskId); + tqWarn("vgId:%d failed to add s-task:0x%x, since already exists in meta store", vgId, taskId); tFreeStreamTask(pTask); } @@ -1191,7 +1095,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamTaskEnablePause(pTask); } } else { - tqDebug("s-task:%s resume from paused, start ts:%"PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); + tqDebug("s-task:%s resume from paused, start ts:%" PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); } // we have to continue retrying to successfully execute the scan history task. @@ -1295,15 +1199,12 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamSetStatusNormal(pTask); } - tqStartStreamTasks(pTq); + tqStartStreamTasks(pTq, false); } streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pStreamTask); } else { - // todo update the chkInfo version for current task. - // this task has an associated history stream task, so we need to scan wal from the end version of - // history scan. The current version of chkInfo.current is not updated during the history scan STimeWindow* pWindow = &pTask->dataRange.window; if (pTask->historyTaskId.taskId == 0) { @@ -1322,7 +1223,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { id, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); } - // notify the downstream agg tasks that upstream tasks are ready to processing the WAL data, update the code = streamTaskScanHistoryDataComplete(pTask); streamMetaReleaseTask(pMeta, pTask); @@ -1334,6 +1234,45 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } +// notify the downstream tasks to transfer executor state after handle all history blocks. +int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + + SStreamTransferReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pReq, len); + int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); + tDecoderClear(&decoder); + + tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, + req.downstreamTaskId); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); + if (pTask == NULL) { + tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", + req.downstreamTaskId); + return -1; + } + + int32_t remain = streamAlignTransferState(pTask); + if (remain > 0) { + tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; + } + + // transfer the ownership of executor state + tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); + ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); + + streamSchedExec(pTask); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; +} + +// only the agg tasks and the sink tasks will receive this message from upstream tasks int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -1386,7 +1325,8 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { } else { tqDebug( "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " - "completed msg", pTask->id.idStr, req.downstreamId); + "completed msg", + pTask->id.idStr, req.downstreamId); streamProcessScanHistoryFinishRsp(pTask); } @@ -1394,59 +1334,6 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { - SDecoder* pCoder = &(SDecoder){0}; - SDeleteRes* pRes = &(SDeleteRes){0}; - - (*pRefBlock) = NULL; - - pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t)); - if (pRes->uidList == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - tDecoderInit(pCoder, (uint8_t*)pData, len); - tDecodeDeleteRes(pCoder, pRes); - tDecoderClear(pCoder); - - int32_t numOfTables = taosArrayGetSize(pRes->uidList); - if (numOfTables == 0 || pRes->affectedRows == 0) { - taosArrayDestroy(pRes->uidList); - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); - blockDataEnsureCapacity(pDelBlock, numOfTables); - pDelBlock->info.rows = numOfTables; - pDelBlock->info.version = ver; - - for (int32_t i = 0; i < numOfTables; i++) { - // start key column - SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX); - colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false); // end key column - SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX); - colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false); - // uid column - SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX); - int64_t* pUid = taosArrayGet(pRes->uidList, i); - colDataSetVal(pUidCol, i, (const char*)pUid, false); - - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i); - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i); - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i); - } - - taosArrayDestroy(pRes->uidList); - *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); - if ((*pRefBlock) == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; - (*pRefBlock)->pBlock = pDelBlock; - return TSDB_CODE_SUCCESS; -} - int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; @@ -1467,20 +1354,20 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { if (pTask != NULL) { // even in halt status, the data in inputQ must be processed int8_t st = pTask->status.taskStatus; - if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY) { + if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__CK) { tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr, - pTask->chkInfo.version); + pTask->chkInfo.currentVer); streamProcessRunReq(pTask); } else { atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, - pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); + pTask->id.idStr, streamGetTaskStatusStr(st), pTask->status.schedStatus); } streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqStartStreamTasks(pTq); + tqStartStreamTasks(pTq, false); return 0; - } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. + } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. // todo add one function to handle this tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, taskId); return -1; @@ -1497,6 +1384,7 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecodeStreamDispatchReq(&decoder, &req); + tDecoderClear(&decoder); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.taskId); if (pTask) { @@ -1505,6 +1393,8 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } else { + tqError("vgId:%d failed to find task:0x%x to handle the dispatch req, it may have been destroyed already", + pTq->pStreamMeta->vgId, req.taskId); tDeleteStreamDispatchReq(&req); return -1; } @@ -1513,18 +1403,21 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t vgId = pTq->pStreamMeta->vgId; - int32_t taskId = htonl(pRsp->upstreamTaskId); - int64_t streamId = htobe64(pRsp->streamId); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, streamId, taskId); + int32_t vgId = pTq->pStreamMeta->vgId; + pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); + pRsp->streamId = htobe64(pRsp->streamId); + pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); + pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pRsp->streamId, pRsp->upstreamTaskId); if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return TSDB_CODE_SUCCESS; } else { - tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, taskId); - return TSDB_CODE_INVALID_MSG; + tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, pRsp->upstreamTaskId); + terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + return terrno; } } @@ -1555,7 +1448,7 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } tqDebug("s-task:%s receive pause msg from mnode", pTask->id.idStr); - streamTaskPause(pTask); + streamTaskPause(pTask, pMeta); SStreamTask* pHistoryTask = NULL; if (pTask->historyTaskId.taskId != 0) { @@ -1571,7 +1464,7 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr); - streamTaskPause(pHistoryTask); + streamTaskPause(pHistoryTask, pMeta); streamMetaReleaseTask(pMeta, pHistoryTask); } @@ -1586,9 +1479,14 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, } // todo: handle the case: resume from halt to pause/ from halt to normal/ from pause to normal - streamTaskResume(pTask); + streamTaskResume(pTask, pTq->pStreamMeta); int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__SINK) { + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; + } + int8_t status = pTask->status.taskStatus; if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__SCAN_HISTORY) { // no lock needs to secure the access of the version @@ -1603,10 +1501,11 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); } - if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && + pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { streamStartScanHistoryAsync(pTask, igUntreated); } else if (level == TASK_LEVEL__SOURCE && (taosQueueItemSize(pTask->inputQueue->queue) == 0)) { - tqStartStreamTasks(pTq); + tqStartStreamTasks(pTq, false); } else { streamSchedExec(pTask); } @@ -1618,13 +1517,14 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); if (code != 0) { return code; } - SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); + SStreamTask* pHistoryTask = + streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); if (pHistoryTask) { code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated); } @@ -1644,18 +1544,17 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { tDecoderClear(&decoder); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.dstTaskId); - - if (pTask) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessRetrieveReq(pTask, &req, &rsp); - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tDeleteStreamRetrieveReq(&req); - return 0; - } else { - tDeleteStreamRetrieveReq(&req); + if (pTask == NULL) { + // tDeleteStreamDispatchReq(&req); return -1; } + + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; + streamProcessRetrieveReq(pTask, &req, &rsp); + + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + tDeleteStreamRetrieveReq(&req); + return 0; } int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { @@ -1663,8 +1562,9 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; } +// todo refactor. int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { - STQ* pTq = pVnode->pTq; + STQ* pTq = pVnode->pTq; int32_t vgId = pVnode->config.vgId; SMsgHead* msgStr = pMsg->pCont; @@ -1683,7 +1583,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { tDecoderClear(&decoder); int32_t taskId = req.taskId; - tqDebug("vgId:%d receive dispatch msg to s-task:0x%"PRIx64"-0x%x", vgId, req.streamId, taskId); + tqDebug("vgId:%d receive dispatch msg to s-task:0x%" PRIx64 "-0x%x", vgId, req.streamId, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { @@ -1694,7 +1594,6 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { taosFreeQitem(pMsg); return 0; } else { - tDeleteStreamDispatchReq(&req); } @@ -1726,7 +1625,7 @@ FAIL: pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); - SRpcMsg rsp = { .code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; + SRpcMsg rsp = {.code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); tmsgSendRsp(&rsp); @@ -1737,3 +1636,240 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } +// todo error code cannot be return, since this is invoked by an mnode-launched transaction. +int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointSourceReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code)); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); + if (pTask == NULL) { + tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, + req.taskId); + return TSDB_CODE_SUCCESS; + } + + // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. + if (pTask->status.downstreamReady != 1) { + qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 + ", set it failure", pTask->id.idStr, req.checkpointId); + streamMetaReleaseTask(pMeta, pTask); + + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; + } + + int32_t total = 0; + taosWLockLatch(&pMeta->lock); + + // set the initial value for generating check point + // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed + if (pMeta->chkptNotReadyTasks == 0) { + pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); + pMeta->totalTasks = pMeta->chkptNotReadyTasks; + } + + total = taosArrayGetSize(pMeta->pTaskList); + taosWUnLockLatch(&pMeta->lock); + + qDebug("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg, chkpt:%" PRId64 ", total checkpoint req:%d", + pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, total); + + code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask, 1); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // todo: when generating checkpoint, no new tasks are allowed to add into current Vnode + // todo: when generating checkpoint, leader of mnode has transfer to other DNode? + streamProcessCheckpointSourceReq(pTask, &req); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task +int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointReadyMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); + return code; + } + + tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, + pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); + + streamProcessCheckpointReadyMsg(pTask); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { + SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t vgId = TD_VID(pTq->pVnode); + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + + SStreamTaskNodeUpdateMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { + rsp.code = TSDB_CODE_MSG_DECODE_ERROR; + tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); + goto _end; + } + + // update the nodeEpset when it exists + taosWLockLatch(&pMeta->lock); + + // when replay the WAL, we should update the task epset one again and again, the task may be in stop status. + int64_t keys[2] = {req.streamId, req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + + if (ppTask == NULL || *ppTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, + req.taskId); + rsp.code = TSDB_CODE_SUCCESS; + taosWUnLockLatch(&pMeta->lock); + goto _end; + } + + SStreamTask* pTask = *ppTask; + + tqDebug("s-task:%s receive task nodeEp update msg from mnode", pTask->id.idStr); + streamTaskUpdateEpsetInfo(pTask, req.pNodeList); + + { + streamSetStatusNormal(pTask); + streamMetaSaveTask(pMeta, pTask); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + } + + streamTaskStop(pTask); + tqDebug("s-task:%s task nodeEp update completed", pTask->id.idStr); + + pMeta->closedTask += 1; + + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + bool allStopped = (pMeta->closedTask == numOfTasks); + if (allStopped) { + pMeta->closedTask = 0; + } else { + tqDebug("vgId:%d closed tasks:%d, not closed:%d", vgId, pMeta->closedTask, (numOfTasks - pMeta->closedTask)); + } + + taosWUnLockLatch(&pMeta->lock); + +_end: + tDecoderClear(&decoder); + + if (allStopped) { + + if (!pTq->pVnode->restored) { + tqDebug("vgId:%d vnode restore not completed, not restart the tasks", vgId); + } else { + tqDebug("vgId:%d all tasks are stopped, restart them", vgId); + taosWLockLatch(&pMeta->lock); + + terrno = 0; + int32_t code = streamMetaReopen(pMeta, 0); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + if (streamLoadTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + taosWUnLockLatch(&pMeta->lock); + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + vInfo("vgId:%d, restart all stream tasks", vgId); + tqCheckStreamStatus(pTq); + } + } + } + + return rsp.code; +} + +int32_t tqProcessTaskStopReq(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + + SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg; + + SStreamMeta* pMeta = pTq->pStreamMeta; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d process stop req, failed to acquire task:0x%x, it may have been dropped already", vgId, + pReq->taskId); + // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s receive stop msg from mnode", pTask->id.idStr); + streamTaskStop(pTask); + + SStreamTask* pHistoryTask = NULL; + if (pTask->historyTaskId.taskId != 0) { + pHistoryTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); + if (pHistoryTask == NULL) { + tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%x, it may have been dropped already", + pMeta->vgId, pTask->historyTaskId.taskId); + streamMetaReleaseTask(pMeta, pTask); + + // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr); + + streamTaskStop(pHistoryTask); + streamMetaReleaseTask(pMeta, pHistoryTask); + } + + streamMetaReleaseTask(pMeta, pTask); + tmsgSendRsp(&rsp); + return 0; +} diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 070a8ecf6f..8a9b95e045 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -30,7 +30,7 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { return 0; } -int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { +int32_t tqPushMsg(STQ* pTq, tmsg_t msgType) { if (msgType == TDMT_VND_SUBMIT) { tqProcessSubmitReqForSubscribe(pTq); } @@ -39,20 +39,14 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); taosRUnLockLatch(&pTq->pStreamMeta->lock); - tqTrace("handle submit, restore:%d, size:%d", pTq->pVnode->restored, numOfTasks); + tqDebug("handle submit, restore:%d, numOfTasks:%d", pTq->pVnode->restored, numOfTasks); // push data for stream processing: // 1. the vnode has already been restored. // 2. the vnode should be the leader. // 3. the stream is not suspended yet. - if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) { - if (numOfTasks == 0) { - return 0; - } - - if (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE) { - tqStartStreamTasks(pTq); - } + if ((!tsDisableStream) && (numOfTasks > 0) && (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE)) { + tqStartStreamTasks(pTq, true); } return 0; diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index ed612587f5..b3fbbf5157 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -14,9 +14,10 @@ */ #include "tq.h" +#include "vnd.h" static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); -static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId); +static int32_t doSetOffsetForWalReader(SStreamTask* pTask, int32_t vgId); // this function should be executed by stream threads. // extract submit block from WAL, and add them into the input queue for the sources tasks. @@ -29,7 +30,7 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { int32_t scan = pMeta->walScanCounter; tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, scan); - // check all restore tasks + // check all tasks bool shouldIdle = true; createStreamTaskRunReq(pTq->pStreamMeta, &shouldIdle); @@ -61,7 +62,7 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) { SStreamMeta* pMeta = pTq->pStreamMeta; int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks); + tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } @@ -71,20 +72,15 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) { pTaskList = taosArrayDup(pMeta->pTaskList, NULL); taosWUnLockLatch(&pMeta->lock); + // broadcast the check downstream tasks msg for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { continue; } - if (pTask->info.fillHistory == 1) { - tqDebug("s-task:%s fill-history task, wait for related stream task:0x%x to launch it", pTask->id.idStr, - pTask->streamTaskId.taskId); - continue; - } - - streamTaskDoCheckDownstreamTasks(pTask); + streamTaskCheckDownstreamTasks(pTask); streamMetaReleaseTask(pMeta, pTask); } @@ -125,10 +121,15 @@ int32_t tqCheckStreamStatus(STQ* pTq) { return 0; } -int32_t tqStartStreamTasks(STQ* pTq) { +int32_t tqStartStreamTasks(STQ* pTq, bool ckPause) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + // for follower or vnode does not restored, do not launch the stream tasks. + if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) { + return TSDB_CODE_SUCCESS; + } + taosWLockLatch(&pMeta->lock); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); @@ -146,6 +147,16 @@ int32_t tqStartStreamTasks(STQ* pTq) { return 0; } + int32_t numOfPauseTasks = pTq->pStreamMeta->pauseTaskNum; + if (ckPause && numOfTasks == numOfPauseTasks) { + tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); + + // reset the counter value, since we do not launch the scan wal operation. + pMeta->walScanCounter = 0; + taosWUnLockLatch(&pMeta->lock); + return 0; + } + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -166,12 +177,12 @@ int32_t tqStartStreamTasks(STQ* pTq) { return 0; } -int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { +int32_t doSetOffsetForWalReader(SStreamTask* pTask, int32_t vgId) { // seek the stored version and extract data from WAL int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); if (pTask->chkInfo.currentVer < firstVer) { - tqWarn("vgId:%d s-task:%s ver:%"PRId64" earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, vgId, - pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer); + tqWarn("vgId:%d s-task:%s ver:%" PRId64 " earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, + vgId, pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer); pTask->chkInfo.currentVer = firstVer; @@ -192,7 +203,8 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { } // append the data for the stream - tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.currentVer); + tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, + pTask->chkInfo.currentVer); } } @@ -222,7 +234,7 @@ static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); appendTranstateIntoInputQ(pTask); - /*int32_t code = */streamSchedExec(pTask); + /*int32_t code = */ streamSchedExec(pTask); } else { qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 ", not scan wal", id, ver, maxVer); @@ -252,7 +264,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { numOfTasks = taosArrayGetSize(pTaskList); for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); SStreamTask* pTask = streamMetaAcquireTask(pStreamMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { continue; @@ -266,8 +278,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } + const char* pStatus = streamGetTaskStatusStr(status); if (status != TASK_STATUS__NORMAL) { - tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, pStatus); streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -276,7 +289,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { ASSERT(status == TASK_STATUS__NORMAL); // the maximum version of data in the WAL has reached already, the step2 is done tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, - pTask->dataRange.range.maxVer); + pTask->dataRange.range.maxVer); streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -303,18 +316,28 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } - int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue); - int64_t maxVer = (pTask->info.fillHistory == 1)? pTask->dataRange.range.maxVer:INT64_MAX; + int32_t numOfItems = streamTaskGetInputQItems(pTask); + int64_t maxVer = (pTask->info.fillHistory == 1) ? pTask->dataRange.range.maxVer : INT64_MAX; SStreamQueueItem* pItem = NULL; - code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, maxVer, pTask->id.idStr); + code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, pTask->id.idStr); - if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue + if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItems == 0)) { // failed, continue checkForFillHistoryVerRange(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); streamMetaReleaseTask(pStreamMeta, pTask); continue; } + taosThreadMutexLock(&pTask->lock); + pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + + if (pTask->status.taskStatus != TASK_STATUS__NORMAL) { + tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus); + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pStreamMeta, pTask); + continue; + } + if (pItem != NULL) { noDataInWal = false; code = tAppendDataToInputQueue(pTask, pItem); @@ -329,7 +352,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { } } - if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) { + taosThreadMutexUnlock(&pTask->lock); + + if ((code == TSDB_CODE_SUCCESS) || (numOfItems > 0)) { code = streamSchedExec(pTask); if (code != TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask); @@ -348,4 +373,3 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { taosArrayDestroy(pTaskList); return 0; } - diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index cce31688bc..f7132ff6c4 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -21,9 +21,14 @@ typedef struct STableSinkInfo { uint64_t uid; - char tbName[TSDB_TABLE_NAME_LEN]; + tstr name; } STableSinkInfo; +static int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, + SSDataBlock* pDataBlock, SStreamTask* pTask); +static int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid); + int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { int32_t totalRows = pDataBlock->info.rows; @@ -97,17 +102,17 @@ end: return ret; } -static int32_t tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) { +static bool tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) { void* pVal = tSimpleHashGet(pTableInfoMap, &groupId, sizeof(uint64_t)); if (pVal) { *pInfo = *(STableSinkInfo**)pVal; - return TSDB_CODE_SUCCESS; + return true; } - return TSDB_CODE_FAILED; + return false; } -int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) { +static int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) { if (tSimpleHashGetSize(tblInfo) > MAX_CACHE_TABLE_INFO_NUM) { return TSDB_CODE_FAILED; } @@ -115,7 +120,7 @@ int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTb return tSimpleHashPut(tblInfo, &groupId, sizeof(uint64_t), &pTbl, POINTER_BYTES); } -int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { +static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { void* buf = NULL; int32_t tlen = 0; encodeCreateChildTableForRPC(pReqs, TD_VID(pVnode), &buf, &tlen); @@ -128,66 +133,40 @@ int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { return TSDB_CODE_SUCCESS; } -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { + +void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data) { const SArray* pBlocks = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; int64_t suid = pTask->tbSink.stbUid; char* stbFullName = pTask->tbSink.stbFullName; STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t vgId = TD_VID(pVnode); + int32_t numOfBlocks = taosArrayGetSize(pBlocks); + int32_t code = TSDB_CODE_SUCCESS; - int32_t blockSz = taosArrayGetSize(pBlocks); + tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table", vgId, pTask->id.idStr, numOfBlocks); - tqDebug("vgId:%d, s-task:%s write results %d blocks into table", TD_VID(pVnode), pTask->id.idStr, blockSz); - - void* pBuf = NULL; SArray* tagArray = NULL; SArray* pVals = NULL; SArray* crTblArray = NULL; - for (int32_t i = 0; i < blockSz; i++) { + for (int32_t i = 0; i < numOfBlocks; i++) { SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); int32_t rows = pDataBlock->info.rows; if (pDataBlock->info.type == STREAM_DELETE_RESULT) { - SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - - tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); - if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { - taosArrayDestroy(deleteReq.deleteReqs); - continue; - } - - int32_t len; - int32_t code; - tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); - if (code != TSDB_CODE_SUCCESS) { - qError("s-task:%s failed to encode delete request", pTask->id.idStr); - } - - SEncoder encoder; - void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); - void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); - tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); - tEncoderClear(&encoder); - taosArrayDestroy(deleteReq.deleteReqs); - - ((SMsgHead*)serializedDeleteReq)->vgId = pVnode->config.vgId; - - SRpcMsg msg = { .msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead) }; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put delete req into write-queue since %s", terrstr()); - } + code = doSinkDeleteBlock(pVnode, stbFullName, pDataBlock, pTask, suid); } else if (pDataBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + tqDebug("s-task:%s build create table msg", pTask->id.idStr); + SVCreateTbBatchReq reqs = {0}; - crTblArray = reqs.pArray = taosArrayInit(1, sizeof(struct SVCreateTbReq)); + crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq)); if (NULL == reqs.pArray) { goto _end; } for (int32_t rowId = 0; rowId < rows; rowId++) { - SVCreateTbReq createTbReq = {0}; - SVCreateTbReq* pCreateTbReq = &createTbReq; + SVCreateTbReq* pCreateTbReq = &((SVCreateTbReq){0}); // set const pCreateTbReq->flags = 0; @@ -203,16 +182,14 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d int32_t size = taosArrayGetSize(pDataBlock->pDataBlock); if (size == 2) { tagArray = taosArrayInit(1, sizeof(STagVal)); + if (!tagArray) { tdDestroySVCreateTbReq(pCreateTbReq); goto _end; } STagVal tagVal = { - .cid = pTSchema->numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .i64 = (int64_t)pDataBlock->info.id.groupId, - }; + .cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; taosArrayPush(tagArray, &tagVal); @@ -227,6 +204,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d tdDestroySVCreateTbReq(pCreateTbReq); goto _end; } + for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); @@ -236,12 +214,13 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d continue; } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { tagVal.nData = varDataLen(pData); - tagVal.pData = varDataVal(pData); + tagVal.pData = (uint8_t*) varDataVal(pData); } else { memcpy(&tagVal.i64, pData, pTagData->info.bytes); } taosArrayPush(tagArray, &tagVal); } + } pCreateTbReq->ctb.tagNum = TMAX(size - UD_TAG_COLUMN_INDEX, 1); @@ -254,7 +233,6 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d goto _end; } - pCreateTbReq->ctb.pTag = (uint8_t*)pTag; // set table name @@ -265,232 +243,27 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d } else { pCreateTbReq->name = taosStrdup(pDataBlock->info.parTbName); } + taosArrayPush(reqs.pArray, pCreateTbReq); + tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name); } + reqs.nReqs = taosArrayGetSize(reqs.pArray); if (tqPutReqToQueue(pVnode, &reqs) != TSDB_CODE_SUCCESS) { goto _end; } + tagArray = taosArrayDestroy(tagArray); taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); crTblArray = NULL; + } else if (pDataBlock->info.type == STREAM_CHECKPOINT) { + continue; } else { - SSubmitTbData tbData = {0}; - tqDebug("tq sink pipe, convert block:%d, rows:%d", i, rows); - - if (!(tbData.aRowP = taosArrayInit(rows, sizeof(SRow*)))) { - goto _end; - } - - tbData.suid = suid; - tbData.uid = 0; // uid is assigned by vnode - tbData.sver = pTSchema->version; - - STableSinkInfo* pTableSinkInfo = NULL; - int32_t res = tqGetTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, &pTableSinkInfo); - if (res != TSDB_CODE_SUCCESS) { - pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo)); - } - - char* ctbName = pDataBlock->info.parTbName; - if (!ctbName[0]) { - memset(ctbName, 0, TSDB_TABLE_NAME_LEN); - if (res == TSDB_CODE_SUCCESS) { - memcpy(ctbName, pTableSinkInfo->tbName, strlen(pTableSinkInfo->tbName)); - } else { - buildCtbNameByGroupIdImpl(stbFullName, pDataBlock->info.id.groupId, ctbName); - memcpy(pTableSinkInfo->tbName, ctbName, strlen(ctbName)); - tqDebug("vgId:%d, gropuId:%" PRIu64 " datablock table name is null", TD_VID(pVnode), - pDataBlock->info.id.groupId); - } - } - - if (res == TSDB_CODE_SUCCESS) { - tbData.uid = pTableSinkInfo->uid; - } else { - SMetaReader mr = {0}; - metaReaderDoInit(&mr, pVnode->pMeta, 0); - if (metaGetTableEntryByName(&mr, ctbName) < 0) { - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - tqDebug("vgId:%d, stream write into %s, table auto created", TD_VID(pVnode), ctbName); - - SVCreateTbReq* pCreateTbReq = NULL; - - if (!(pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)))) { - goto _end; - }; - - // set const - pCreateTbReq->flags = 0; - pCreateTbReq->type = TSDB_CHILD_TABLE; - pCreateTbReq->ctb.suid = suid; - - // set super table name - SName name = {0}; - tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); // taosStrdup(stbFullName); - - // set tag content - tagArray = taosArrayInit(1, sizeof(STagVal)); - if (!tagArray) { - tdDestroySVCreateTbReq(pCreateTbReq); - taosMemoryFreeClear(pCreateTbReq); - goto _end; - } - STagVal tagVal = { - .cid = pTSchema->numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .i64 = (int64_t)pDataBlock->info.id.groupId, - }; - taosArrayPush(tagArray, &tagVal); - pCreateTbReq->ctb.tagNum = taosArrayGetSize(tagArray); - - STag* pTag = NULL; - tTagNew(tagArray, 1, false, &pTag); - tagArray = taosArrayDestroy(tagArray); - if (pTag == NULL) { - tdDestroySVCreateTbReq(pCreateTbReq); - taosMemoryFreeClear(pCreateTbReq); - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - pCreateTbReq->ctb.pTag = (uint8_t*)pTag; - - // set tag name - SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); - char tagNameStr[TSDB_COL_NAME_LEN] = {0}; - strcpy(tagNameStr, "group_id"); - taosArrayPush(tagName, tagNameStr); - pCreateTbReq->ctb.tagName = tagName; - - // set table name - pCreateTbReq->name = taosStrdup(ctbName); - - tbData.pCreateTbReq = pCreateTbReq; - tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE; - } else { - if (mr.me.type != TSDB_CHILD_TABLE) { - tqError("vgId:%d, failed to write into %s, since table type incorrect, type %d", TD_VID(pVnode), ctbName, - mr.me.type); - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - continue; - } - - if (mr.me.ctbEntry.suid != suid) { - tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid: %" PRId64 - ", actual suid %" PRId64 "", - TD_VID(pVnode), ctbName, suid, mr.me.ctbEntry.suid); - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - continue; - } - - tbData.uid = mr.me.uid; - pTableSinkInfo->uid = mr.me.uid; - int32_t code = tqPutTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, pTableSinkInfo); - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pTableSinkInfo); - } - metaReaderClear(&mr); - } - } - - // rows - if (!pVals && !(pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)))) { - taosArrayDestroy(tbData.aRowP); - tdDestroySVCreateTbReq(tbData.pCreateTbReq); - goto _end; - } - - for (int32_t j = 0; j < rows; j++) { - taosArrayClear(pVals); - int32_t dataIndex = 0; - for (int32_t k = 0; k < pTSchema->numOfCols; k++) { - const STColumn* pCol = &pTSchema->columns[k]; - if (k == 0) { - SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); - void* colData = colDataGetData(pColData, j); - tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); - } - if (IS_SET_NULL(pCol)) { - SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); - } else { - SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); - if (colDataIsNull_s(pColData, j)) { - SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); - dataIndex++; - } else { - void* colData = colDataGetData(pColData, j); - if (IS_STR_DATA_TYPE(pCol->type)) { - // address copy, no value - SValue sv = (SValue){.nData = varDataLen(colData), .pData = varDataVal(colData)}; - SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); - taosArrayPush(pVals, &cv); - } else { - SValue sv; - memcpy(&sv.val, colData, tDataTypes[pCol->type].bytes); - SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); - taosArrayPush(pVals, &cv); - } - dataIndex++; - } - } - } - SRow* pRow = NULL; - if ((terrno = tRowBuild(pVals, (STSchema*)pTSchema, &pRow)) < 0) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - goto _end; - } - ASSERT(pRow); - taosArrayPush(tbData.aRowP, &pRow); - } - - SSubmitReq2 submitReq = {0}; - if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - goto _end; - } - - taosArrayPush(submitReq.aSubmitTbData, &tbData); - - // encode - int32_t len; - int32_t code; - tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); - SEncoder encoder; - len += sizeof(SSubmitReq2Msg); - pBuf = rpcMallocCont(len); - if (NULL == pBuf) { - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - goto _end; - } - ((SSubmitReq2Msg*)pBuf)->header.vgId = TD_VID(pVnode); - ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); - ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); - tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("failed to encode submit req since %s", terrstr()); - tEncoderClear(&encoder); - rpcFreeCont(pBuf); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - continue; - } - tEncoderClear(&encoder); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - - SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len }; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put into write-queue since %s", terrstr()); - } + code = doSinkResultBlock(pVnode, i, stbFullName, suid, pDataBlock, pTask); } } - tqDebug("vgId:%d, s-task:%s write results completed", TD_VID(pVnode), pTask->id.idStr); + tqDebug("vgId:%d, s-task:%s write results completed", vgId, pTask->id.idStr); _end: taosArrayDestroy(tagArray); @@ -498,3 +271,380 @@ _end: taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); // TODO: change } + +int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { + SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; + + int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { + taosArrayDestroy(deleteReq.deleteReqs); + return TSDB_CODE_SUCCESS; + } + + int32_t len; + tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); + if (code != TSDB_CODE_SUCCESS) { + qError("s-task:%s failed to encode delete request", pTask->id.idStr); + return code; + } + + SEncoder encoder; + void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); + void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, len); + tEncodeSBatchDeleteReq(&encoder, &deleteReq); + tEncoderClear(&encoder); + taosArrayDestroy(deleteReq.deleteReqs); + + ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); + + SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; + if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + tqDebug("failed to put delete req into write-queue since %s", terrstr()); + } + + return TSDB_CODE_SUCCESS; +} + +static bool isValidDestChildTable(SMetaReader* pReader, int32_t vgId, char* ctbName, int64_t suid) { + if (pReader->me.type != TSDB_CHILD_TABLE) { + tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type); + return false; + } + + if (pReader->me.ctbEntry.suid != suid) { + tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid:%" PRId64 ", actual:%" PRId64, + vgId, ctbName, suid, pReader->me.ctbEntry.suid); + return false; + } + + return true; +} + +static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock) { + char* ctbName = pDataBlock->info.parTbName; + + SVCreateTbReq* pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)); + if (pCreateTbReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + // set tag content + SArray* tagArray = taosArrayInit(1, sizeof(STagVal)); + if (tagArray == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tdDestroySVCreateTbReq(pCreateTbReq); + taosMemoryFreeClear(pCreateTbReq); + return NULL; + } + + // set const + pCreateTbReq->flags = 0; + pCreateTbReq->type = TSDB_CHILD_TABLE; + pCreateTbReq->ctb.suid = suid; + + // set super table name + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); + pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); + + STagVal tagVal = { .cid = numOfCols, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; + taosArrayPush(tagArray, &tagVal); + pCreateTbReq->ctb.tagNum = taosArrayGetSize(tagArray); + + STag* pTag = NULL; + tTagNew(tagArray, 1, false, &pTag); + taosArrayDestroy(tagArray); + + if (pTag == NULL) { + tdDestroySVCreateTbReq(pCreateTbReq); + taosMemoryFreeClear(pCreateTbReq); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pCreateTbReq->ctb.pTag = (uint8_t*)pTag; + + // set tag name + SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); + char k[TSDB_COL_NAME_LEN] = "group_id"; + taosArrayPush(tagName, k); + + pCreateTbReq->ctb.tagName = tagName; + + // set table name + pCreateTbReq->name = taosStrdup(ctbName); + return pCreateTbReq; +} + +static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, uint64_t uid, + const char* id) { + pTableSinkInfo->uid = uid; + + int32_t code = tqPutTableInfo(pSinkTableMap, groupId, pTableSinkInfo); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFreeClear(pTableSinkInfo); + tqError("s-task:%s failed to put tableSinkInfo in to cache, code:%s", id, tstrerror(code)); + } else { + tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, + pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); + } + + return code; +} + +int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, SSDataBlock* pDataBlock, + SStreamTask* pTask) { + int32_t numOfRows = pDataBlock->info.rows; + int32_t vgId = TD_VID(pVnode); + uint64_t groupId = pDataBlock->info.id.groupId; + STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t code = TSDB_CODE_SUCCESS; + void* pBuf = NULL; + SArray* pVals = NULL; + const char* id = pTask->id.idStr; + + SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; + tqDebug("s-task:%s sink data pipeline, build submit msg from %d-th resBlock, including %d rows, dst suid:%" PRId64, + id, blockIndex + 1, numOfRows, suid); + + tbData.aRowP = taosArrayInit(numOfRows, sizeof(SRow*)); + pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); + + if (tbData.aRowP == NULL || pVals == NULL) { + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + code = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:%s vgId:%d failed to prepare write stream res blocks, code:%s", id, vgId, tstrerror(code)); + return code; + } + + STableSinkInfo* pTableSinkInfo = NULL; + bool exist = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo); + + char* dstTableName = pDataBlock->info.parTbName; + if (exist) { + if (dstTableName[0] == 0) { + tstrncpy(dstTableName, pTableSinkInfo->name.data, pTableSinkInfo->name.len + 1); + tqDebug("s-task:%s vgId:%d, gropuId:%" PRIu64 " datablock table name is null, set name:%s", id, vgId, groupId, + dstTableName); + } else { + if (pTableSinkInfo->uid != 0) { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(uid:%" PRIu64 ")", id, numOfRows, groupId, + dstTableName, pTableSinkInfo->uid); + } else { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(not set uid yet for the secondary block)", + id, numOfRows, groupId, dstTableName); + } + } + } else { // not exist + if (dstTableName[0] == 0) { + memset(dstTableName, 0, TSDB_TABLE_NAME_LEN); + buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); + } + + int32_t nameLen = strlen(dstTableName); + pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo) + nameLen); + + pTableSinkInfo->name.len = nameLen; + memcpy(pTableSinkInfo->name.data, dstTableName, nameLen); + tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); + } + + if (exist) { + tbData.uid = pTableSinkInfo->uid; + + if (tbData.uid == 0) { + tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); + } + + while (pTableSinkInfo->uid == 0) { + // wait for the table to be created + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + code = metaGetTableEntryByName(&mr, dstTableName); + if (code == 0) { // table alreay exists, check its type and uid + bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); + if (!isValid) { // not valid table, ignore it + metaReaderClear(&mr); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + return TSDB_CODE_SUCCESS; + } else { + tqDebug("s-task:%s set uid:%"PRIu64" for dstTable:%s from meta", id, mr.me.uid, pTableSinkInfo->name.data); + + tbData.uid = mr.me.uid; + pTableSinkInfo->uid = mr.me.uid; + metaReaderClear(&mr); + } + } else { // not exist, wait and retry + metaReaderClear(&mr); + taosMsleep(100); + tqDebug("s-task:%s wait for the table:%s ready before insert data", id, dstTableName); + } + } + + } else { + // todo: this check is not safe, and results in losing of submit message from WAL. + // The auto-create option will always set to be open for those submit messages, which arrive during the period + // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning + // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for + // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have + // randomly generated false table uid in the WAL. + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + // table not in cache, let's try the extract it from tsdb meta + if (metaGetTableEntryByName(&mr, dstTableName) < 0) { + metaReaderClear(&mr); + + tqDebug("s-task:%s stream write into table:%s, table auto created", id, dstTableName); + + tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE; + tbData.pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock); + if (tbData.pCreateTbReq == NULL) { + tqError("s-task:%s failed to build auto create table req, code:%s", id, tstrerror(terrno)); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + return terrno; + } + + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, 0, id); + } else { + bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); + if (!isValid) { + metaReaderClear(&mr); + taosMemoryFree(pTableSinkInfo); + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_SUCCESS; + } else { + tbData.uid = mr.me.uid; + metaReaderClear(&mr); + + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, tbData.uid, id); + } + } + } + + // rows + for (int32_t j = 0; j < numOfRows; j++) { + taosArrayClear(pVals); + + int32_t dataIndex = 0; + for (int32_t k = 0; k < pTSchema->numOfCols; k++) { + const STColumn* pCol = &pTSchema->columns[k]; + if (k == 0) { + SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); + void* colData = colDataGetData(pColData, j); + tqDebug("s-task:%s tq sink pipe2, row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); + } + + if (IS_SET_NULL(pCol)) { + SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); + taosArrayPush(pVals, &cv); + } else { + SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); + if (colDataIsNull_s(pColData, j)) { + SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); + taosArrayPush(pVals, &cv); + dataIndex++; + } else { + void* colData = colDataGetData(pColData, j); + if (IS_STR_DATA_TYPE(pCol->type)) { + // address copy, no value + SValue sv = (SValue){.nData = varDataLen(colData), .pData = varDataVal(colData)}; + SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); + taosArrayPush(pVals, &cv); + } else { + SValue sv; + memcpy(&sv.val, colData, tDataTypes[pCol->type].bytes); + SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); + taosArrayPush(pVals, &cv); + } + dataIndex++; + } + } + } + + SRow* pRow = NULL; + code = tRowBuild(pVals, (STSchema*)pTSchema, &pRow); + if (code != TSDB_CODE_SUCCESS) { + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return code; + } + + ASSERT(pRow); + taosArrayPush(tbData.aRowP, &pRow); + } + + SSubmitReq2 submitReq = {0}; + if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(submitReq.aSubmitTbData, &tbData); + + // encode + int32_t len = 0; + tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); + + SEncoder encoder; + len += sizeof(SSubmitReq2Msg); + + pBuf = rpcMallocCont(len); + if (NULL == pBuf) { + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + } + + ((SSubmitReq2Msg*)pBuf)->header.vgId = vgId; + ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); + ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); + + tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); + if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("failed to encode submit req, code:%s, ignore and continue", terrstr()); + tEncoderClear(&encoder); + rpcFreeCont(pBuf); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + + return code; + } + + tEncoderClear(&encoder); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + + SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len }; + code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg); + + if(code == TSDB_CODE_SUCCESS) { + tqDebug("s-task:%s send submit msg to dstTable:%s, numOfRows:%d", id, pTableSinkInfo->name.data, numOfRows); + } else { + tqError("s-task:%s failed to put into write-queue since %s", id, terrstr()); + } + + taosArrayDestroy(pVals); + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index ab7093a701..6469045621 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -14,128 +14,122 @@ */ #include "meta.h" +#include "streamSnapshot.h" #include "tdbInt.h" #include "tq.h" // STqSnapReader ======================================== -struct STqSnapReader { +struct SStreamStateReader { STQ* pTq; int64_t sver; int64_t ever; TBC* pCur; + + SStreamSnapReader* pReaderImpl; + int32_t complete; // open reader or not }; -int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) { - int32_t code = 0; - STqSnapReader* pReader = NULL; +int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader) { + int32_t code = 0; + SStreamStateReader* pReader = NULL; + + char tdir[TSDB_FILENAME_LEN * 2] = {0}; // alloc - pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader)); + pReader = (SStreamStateReader*)taosMemoryCalloc(1, sizeof(SStreamStateReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + + SStreamMeta* meta = pTq->pStreamMeta; pReader->pTq = pTq; pReader->sver = sver; pReader->ever = ever; - // impl - code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL); - if (code) { + int64_t chkpId = meta ? meta->chkpId : 0; + + SStreamSnapReader* pSnapReader = NULL; + + if (streamSnapReaderOpen(pTq, sver, chkpId, pTq->path, &pSnapReader) == 0) { + pReader->complete = 1; + } else { + code = -1; taosMemoryFree(pReader); goto _err; } + pReader->pReaderImpl = pSnapReader; - code = tdbTbcMoveToFirst(pReader->pCur); - if (code) { - taosMemoryFree(pReader); - goto _err; - } - - tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode)); + tqDebug("vgId:%d, vnode %s snapshot reader opened", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER); *ppReader = pReader; return code; _err: - tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode %s snapshot reader failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + tstrerror(code)); *ppReader = NULL; return code; } -int32_t tqSnapReaderClose(STqSnapReader** ppReader) { +int32_t streamStateSnapReaderClose(SStreamStateReader* pReader) { int32_t code = 0; - - tdbTbcClose((*ppReader)->pCur); - taosMemoryFree(*ppReader); - *ppReader = NULL; - + tqDebug("vgId:%d, vnode %s snapshot reader closed", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); + streamSnapReaderClose(pReader->pReaderImpl); + taosMemoryFree(pReader); return code; } -int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - const void* pKey = NULL; - const void* pVal = NULL; - int32_t kLen = 0; - int32_t vLen = 0; - SDecoder decoder; - STqHandle handle; +int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { + tqDebug("vgId:%d, vnode %s snapshot read data", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); - *ppData = NULL; - for (;;) { - if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) { - goto _exit; - } - - tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSTqHandle(&decoder, &handle); - tDecoderClear(&decoder); - - if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) { - tdbTbcMoveToNext(pReader->pCur); - break; - } else { - tdbTbcMoveToNext(pReader->pCur); - } + int32_t code = 0; + if (pReader->complete == 0) { + return 0; } - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen); + uint8_t* rowData = NULL; + int64_t len; + code = streamSnapRead(pReader->pReaderImpl, &rowData, &len); + if (rowData == NULL || len == 0) { + return code; + } + *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + len); if (*ppData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - + // refactor later, avoid mem/free freq SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_TQ_HANDLE; - pHdr->size = vLen; - memcpy(pHdr->data, pVal, vLen); - - tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode), - handle.snapshotVer, handle.subKey, vLen); - -_exit: + pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; + pHdr->size = len; + memcpy(pHdr->data, rowData, len); + tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); return code; _err: - tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-state snapshot failed to read since %s", TD_VID(pReader->pTq->pVnode), + tstrerror(code)); return code; } // STqSnapWriter ======================================== -struct STqSnapWriter { +struct SStreamStateWriter { STQ* pTq; int64_t sver; int64_t ever; TXN* txn; + + SStreamSnapWriter* pWriterImpl; }; -int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { - int32_t code = 0; - STqSnapWriter* pWriter; +int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter) { + int32_t code = 0; + SStreamStateWriter* pWriter; + char tdir[TSDB_FILENAME_LEN * 2] = {0}; // alloc - pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + pWriter = (SStreamStateWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -144,68 +138,48 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - code = -1; - taosMemoryFree(pWriter); + sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received"); + taosMkDir(tdir); + + SStreamSnapWriter* pSnapWriter = NULL; + if (streamSnapWriterOpen(pTq, sver, ever, tdir, &pSnapWriter) < 0) { goto _err; } + tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tdir); + pWriter->pWriterImpl = pSnapWriter; + *ppWriter = pWriter; return code; - _err: - tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); - *ppWriter = NULL; - return code; -} - -int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; - STqSnapWriter* pWriter = *ppWriter; - STQ* pTq = pWriter->pTq; - - if (rollback) { - tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); - } else { - code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); - if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); - if (code) goto _err; - } - + tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + tstrerror(code)); taosMemoryFree(pWriter); *ppWriter = NULL; + return -1; +} - // restore from metastore - if (tqMetaRestoreHandle(pTq) < 0) { - goto _err; +int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) { + int32_t code = 0; + tqDebug("vgId:%d, vnode %s snapshot writer closed", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + code = streamSnapWriterClose(pWriter->pWriterImpl, rollback); + + return code; +} +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { + tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta, chkpId); + if (code == 0) { + code = streamStateLoadTasks(pWriter); } - - return code; - -_err: - tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); + tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + taosMemoryFree(pWriter); return code; } -int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STQ* pTq = pWriter->pTq; - SDecoder decoder = {0}; - SDecoder* pDecoder = &decoder; - STqHandle handle; +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamLoadTasks(pWriter->pTq->pStreamMeta); } - tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - code = tDecodeSTqHandle(pDecoder, &handle); - if (code) goto _err; - code = tqMetaSaveHandle(pTq, handle.subKey, &handle); - if (code < 0) goto _err; - tDecoderClear(pDecoder); - - return code; - -_err: - tDecoderClear(pDecoder); - tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); - return code; +int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { + tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); } diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index ab7093a701..20d25dbceb 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -18,19 +18,26 @@ #include "tq.h" // STqSnapReader ======================================== -struct STqSnapReader { + +typedef struct { + int8_t type; + TTB* tbl; +} STablePair; +struct SStreamTaskReader { STQ* pTq; int64_t sver; int64_t ever; TBC* pCur; + SArray* tdbTbList; + int8_t pos; }; -int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) { - int32_t code = 0; - STqSnapReader* pReader = NULL; +int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader) { + int32_t code = 0; + SStreamTaskReader* pReader = NULL; // alloc - pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader)); + pReader = (SStreamTaskReader*)taosMemoryCalloc(1, sizeof(SStreamTaskReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -38,68 +45,101 @@ int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** p pReader->pTq = pTq; pReader->sver = sver; pReader->ever = ever; + pReader->tdbTbList = taosArrayInit(4, sizeof(STablePair)); - // impl - code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL); + STablePair pair1 = {.tbl = pTq->pStreamMeta->pTaskDb, .type = SNAP_DATA_STREAM_TASK}; + taosArrayPush(pReader->tdbTbList, &pair1); + + STablePair pair2 = {.tbl = pTq->pStreamMeta->pCheckpointDb, .type = SNAP_DATA_STREAM_TASK_CHECKPOINT}; + taosArrayPush(pReader->tdbTbList, &pair2); + + pReader->pos = 0; + + STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); + code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL); if (code) { + tqInfo("vgId:%d, vnode stream-task snapshot reader failed to open, reason: %s", TD_VID(pTq->pVnode), + tstrerror(code)); taosMemoryFree(pReader); goto _err; } code = tdbTbcMoveToFirst(pReader->pCur); if (code) { + tqInfo("vgId:%d, vnode stream-task snapshot reader failed to iterate, reason: %s", TD_VID(pTq->pVnode), + tstrerror(code)); taosMemoryFree(pReader); goto _err; } - tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode)); + tqDebug("vgId:%d, vnode stream-task snapshot reader opened", TD_VID(pTq->pVnode)); *ppReader = pReader; return code; _err: - tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); *ppReader = NULL; return code; } -int32_t tqSnapReaderClose(STqSnapReader** ppReader) { +int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader) { int32_t code = 0; - - tdbTbcClose((*ppReader)->pCur); - taosMemoryFree(*ppReader); - *ppReader = NULL; + tqInfo("vgId:%d, vnode stream-task snapshot reader closed", TD_VID(pReader->pTq->pVnode)); + taosArrayDestroy(pReader->tdbTbList); + tdbTbcClose(pReader->pCur); + taosMemoryFree(pReader); return code; } -int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { +int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData) { int32_t code = 0; const void* pKey = NULL; - const void* pVal = NULL; + void* pVal = NULL; int32_t kLen = 0; int32_t vLen = 0; SDecoder decoder; STqHandle handle; *ppData = NULL; + int8_t except = 0; + tqDebug("vgId:%d, vnode stream-task snapshot start read data", TD_VID(pReader->pTq->pVnode)); + + STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); +NextTbl: + except = 0; for (;;) { - if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) { - goto _exit; - } - - tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSTqHandle(&decoder, &handle); - tDecoderClear(&decoder); - - if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) { - tdbTbcMoveToNext(pReader->pCur); + const void* tVal = NULL; + int32_t tLen = 0; + if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &tVal, &tLen)) { + except = 1; break; } else { - tdbTbcMoveToNext(pReader->pCur); + pVal = taosMemoryCalloc(1, tLen); + memcpy(pVal, tVal, tLen); + vLen = tLen; + } + tdbTbcMoveToNext(pReader->pCur); + break; + } + if (except == 1) { + if (pReader->pos + 1 < taosArrayGetSize(pReader->tdbTbList)) { + tdbTbcClose(pReader->pCur); + + pReader->pos += 1; + pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); + code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL); + tdbTbcMoveToFirst(pReader->pCur); + + goto NextTbl; } } - + if (pVal == NULL || vLen == 0) { + *ppData = NULL; + tqDebug("vgId:%d, vnode stream-task snapshot finished read data", TD_VID(pReader->pTq->pVnode)); + return code; + } *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen); if (*ppData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -107,35 +147,34 @@ int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { } SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_TQ_HANDLE; + pHdr->type = pPair->type; pHdr->size = vLen; memcpy(pHdr->data, pVal, vLen); + taosMemoryFree(pVal); - tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode), - handle.snapshotVer, handle.subKey, vLen); + tqDebug("vgId:%d, vnode stream-task snapshot read data vLen:%d", TD_VID(pReader->pTq->pVnode), vLen); -_exit: return code; - _err: - tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot read data failed since %s", TD_VID(pReader->pTq->pVnode), + tstrerror(code)); return code; } // STqSnapWriter ======================================== -struct STqSnapWriter { +struct SStreamTaskWriter { STQ* pTq; int64_t sver; int64_t ever; TXN* txn; }; -int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { - int32_t code = 0; - STqSnapWriter* pWriter; +int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter) { + int32_t code = 0; + SStreamTaskWriter* pWriter; // alloc - pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + pWriter = (SStreamTaskWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -144,68 +183,88 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + if (tdbBegin(pTq->pStreamMeta->db, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { code = -1; taosMemoryFree(pWriter); goto _err; } *ppWriter = pWriter; + tqDebug("vgId:%d, vnode stream-task snapshot writer opened", TD_VID(pTq->pVnode)); return code; _err: - tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot writer failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code)); *ppWriter = NULL; return code; + return 0; } -int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; - STqSnapWriter* pWriter = *ppWriter; - STQ* pTq = pWriter->pTq; +int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { + int32_t code = 0; + STQ* pTq = pWriter->pTq; + tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode)); if (rollback) { - tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); + tdbAbort(pWriter->pTq->pStreamMeta->db, pWriter->txn); } else { - code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); + code = tdbCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); + code = tdbPostCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); if (code) goto _err; } taosMemoryFree(pWriter); - *ppWriter = NULL; // restore from metastore - if (tqMetaRestoreHandle(pTq) < 0) { - goto _err; + // if (tqMetaRestoreHandle(pTq) < 0) { + // goto _err; + // } + + return code; + +_err: + tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode), + tstrerror(code)); + return code; + return 0; +} + +int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData) { + int32_t code = 0; + STQ* pTq = pWriter->pTq; + STqHandle handle; + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + if (pHdr->type == SNAP_DATA_STREAM_TASK) { + SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + if (pTask == NULL) { + return -1; + } + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); + code = tDecodeStreamTask(&decoder, pTask); + if (code < 0) { + tDecoderClear(&decoder); + taosMemoryFree(pTask); + goto _err; + } + tDecoderClear(&decoder); + // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) + if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), + (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr), pWriter->txn) < 0) { + taosMemoryFree(pTask); + return -1; + } + taosMemoryFree(pTask); + } else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) { + // do nothing } + tqDebug("vgId:%d, vnode stream-task snapshot write", TD_VID(pTq->pVnode)); return code; _err: - tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); - return code; -} - -int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STQ* pTq = pWriter->pTq; - SDecoder decoder = {0}; - SDecoder* pDecoder = &decoder; - STqHandle handle; - - tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - code = tDecodeSTqHandle(pDecoder, &handle); - if (code) goto _err; - code = tqMetaSaveHandle(pTq, handle.subKey, &handle); - if (code < 0) goto _err; - tDecoderClear(pDecoder); - - return code; - -_err: - tDecoderClear(pDecoder); - tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code)); return code; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index b7fd505784..60d23663d0 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -36,6 +36,12 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { return 0; } +void tqUpdateNodeStage(STQ* pTq) { + SSyncState state = syncGetState(pTq->pVnode->sync); + pTq->pStreamMeta->stage = state.term; + tqDebug("vgId:%d update the meta stage to be:%"PRId64, pTq->pStreamMeta->vgId, pTq->pStreamMeta->stage); +} + static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) { pRsp->reqOffset = pOffset; pRsp->rspOffset = pOffset; @@ -400,3 +406,56 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* tmsgSendRsp(&rsp); return 0; } + +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { + SDecoder* pCoder = &(SDecoder){0}; + SDeleteRes* pRes = &(SDeleteRes){0}; + + *pRefBlock = NULL; + + pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t)); + if (pRes->uidList == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + tDecoderInit(pCoder, (uint8_t*)pData, len); + tDecodeDeleteRes(pCoder, pRes); + tDecoderClear(pCoder); + + int32_t numOfTables = taosArrayGetSize(pRes->uidList); + if (numOfTables == 0 || pRes->affectedRows == 0) { + taosArrayDestroy(pRes->uidList); + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); + blockDataEnsureCapacity(pDelBlock, numOfTables); + pDelBlock->info.rows = numOfTables; + pDelBlock->info.version = ver; + + for (int32_t i = 0; i < numOfTables; i++) { + // start key column + SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX); + colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false); // end key column + SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX); + colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false); + // uid column + SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX); + int64_t* pUid = taosArrayGet(pRes->uidList, i); + colDataSetVal(pUidCol, i, (const char*)pUid, false); + + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i); + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i); + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i); + } + + taosArrayDestroy(pRes->uidList); + *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + if (*pRefBlock == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; + (*pRefBlock)->pBlock = pDelBlock; + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index c684ad5184..14aa2a84a9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -88,6 +88,9 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { + STsdb *pdb = *pTsdb; + tsdbDebug("vgId:%d, tsdb is close at %s, days:%d, keep:%d,%d,%d", TD_VID(pdb->pVnode), pdb->path, pdb->keepCfg.days, + pdb->keepCfg.keep0, pdb->keepCfg.keep1, pdb->keepCfg.keep2); taosThreadRwlockWrlock(&(*pTsdb)->rwLock); tsdbMemTableDestroy((*pTsdb)->mem, true); (*pTsdb)->mem = NULL; diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 0b45ff5c4d..db94f32459 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -422,6 +422,15 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC // open tq sprintf(tdir, "%s%s%s", dir, TD_DIRSEP, VNODE_TQ_DIR); taosRealPath(tdir, NULL, sizeof(tdir)); + + // open query + if (vnodeQueryOpen(pVnode)) { + vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + // sma required the tq is initialized before the vnode open pVnode->pTq = tqOpen(tdir, pVnode); if (pVnode->pTq == NULL) { vError("vgId:%d, failed to open vnode tq since %s", TD_VID(pVnode), tstrerror(terrno)); @@ -434,13 +443,6 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC goto _err; } - // open query - if (vnodeQueryOpen(pVnode)) { - vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno)); - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - // vnode begin if (vnodeBegin(pVnode) < 0) { vError("vgId:%d, failed to begin since %s", TD_VID(pVnode), tstrerror(terrno)); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index d559783c2f..bfddeedd78 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -91,7 +91,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) // CONFIG ============== // FIXME: if commit multiple times and the config changed? if (!pReader->cfgDone) { - char fName[TSDB_FILENAME_LEN]; + char fName[TSDB_FILENAME_LEN]; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, fName, TSDB_FILENAME_LEN); @@ -220,9 +220,57 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // STREAM ============ + vInfo("stream task start"); if (!pReader->streamTaskDone) { + if (pReader->pStreamTaskReader == NULL) { + vInfo("stream task start 1"); + code = streamTaskSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamTaskReader); + if (code) { + vInfo("stream task start err"); + goto _err; + } + } + code = streamTaskSnapRead(pReader->pStreamTaskReader, ppData); + vInfo("stream task start 2"); + if (code) { + vInfo("stream task start 3"); + goto _err; + } else { + if (*ppData) { + goto _exit; + vInfo("stream task start 4"); + } else { + pReader->streamTaskDone = 1; + code = streamTaskSnapReaderClose(pReader->pStreamTaskReader); + vInfo("stream task start 5"); + if (code) goto _err; + pReader->pStreamTaskReader = NULL; + } + } } if (!pReader->streamStateDone) { + if (pReader->pStreamStateReader == NULL) { + code = + streamStateSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamStateReader); + if (code) { + pReader->streamStateDone = 1; + pReader->pStreamStateReader = NULL; + goto _err; + } + } + code = streamStateSnapRead(pReader->pStreamStateReader, ppData); + if (code) { + goto _err; + } else { + if (*ppData) { + goto _exit; + } else { + pReader->streamStateDone = 1; + code = streamStateSnapReaderClose(pReader->pStreamStateReader); + if (code) goto _err; + pReader->pStreamStateReader = NULL; + } + } } // RSMA ============== @@ -362,6 +410,20 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } + if (pWriter->pStreamTaskWriter) { + code = streamTaskSnapWriterClose(pWriter->pStreamTaskWriter, rollback); + if (code) goto _exit; + } + + if (pWriter->pStreamStateWriter) { + code = streamStateSnapWriterClose(pWriter->pStreamStateWriter, rollback); + if (code) goto _exit; + + code = streamStateRebuildFromSnap(pWriter->pStreamStateWriter, 0); + pWriter->pStreamStateWriter = NULL; + if (code) goto _exit; + } + if (pWriter->pRsmaSnapWriter) { code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback); if (code) goto _exit; @@ -381,7 +443,7 @@ _exit: } static int32_t vnodeSnapWriteInfo(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { - int32_t code = 0; + int32_t code = 0; SVnode *pVnode = pWriter->pVnode; SSnapDataHdr *pHdr = (SSnapDataHdr *)pData; @@ -459,9 +521,23 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { } break; case SNAP_DATA_TQ_OFFSET: { } break; - case SNAP_DATA_STREAM_TASK: { + case SNAP_DATA_STREAM_TASK: + case SNAP_DATA_STREAM_TASK_CHECKPOINT: { + if (pWriter->pStreamTaskWriter == NULL) { + code = streamTaskSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamTaskWriter); + if (code) goto _err; + } + code = streamTaskSnapWrite(pWriter->pStreamTaskWriter, pData, nData); + if (code) goto _err; } break; - case SNAP_DATA_STREAM_STATE: { + case SNAP_DATA_STREAM_STATE_BACKEND: { + if (pWriter->pStreamStateWriter == NULL) { + code = streamStateSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamStateWriter); + if (code) goto _err; + } + code = streamStateSnapWrite(pWriter->pStreamStateWriter, pData, nData); + if (code) goto _err; + } break; case SNAP_DATA_RSMA1: case SNAP_DATA_RSMA2: diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index ccdde8ade4..1ec301f7de 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -377,7 +377,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { SEncoder *pCoder = &(SEncoder){0}; SDeleteRes res = {0}; - SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; initStorageAPI(&handle.api); code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res); @@ -561,7 +561,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } break; case TDMT_STREAM_TASK_DEPLOY: { - if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) { + if (tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) { goto _err; } } break; @@ -571,12 +571,14 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } } break; case TDMT_STREAM_TASK_PAUSE: { - if (pVnode->restored && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { + if (pVnode->restored && vnodeIsLeader(pVnode) && + tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { goto _err; } } break; case TDMT_STREAM_TASK_RESUME: { - if (pVnode->restored && tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { + if (pVnode->restored && vnodeIsLeader(pVnode) && + tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { goto _err; } } break; @@ -586,6 +588,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } break; + case TDMT_VND_ALTER_CONFIG: vnodeProcessAlterConfigReq(pVnode, ver, pReq, len, pRsp); break; @@ -598,6 +601,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg case TDMT_VND_DROP_INDEX: vnodeProcessDropIndexReq(pVnode, ver, pReq, len, pRsp); break; + case TDMT_VND_STREAM_CHECK_POINT_SOURCE: + tqProcessStreamCheckPointSourceReq(pVnode->pTq, pMsg); + break; + case TDMT_VND_STREAM_TASK_UPDATE: + tqProcessTaskUpdateReq(pVnode->pTq, pMsg); + break; case TDMT_VND_COMPACT: vnodeProcessCompactVnodeReq(pVnode, ver, pReq, len, pRsp); goto _exit; @@ -614,7 +623,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg walApplyVer(pVnode->pWal, ver); - if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, ver) < 0) { + if (tqPushMsg(pVnode->pTq, pMsg->msgType) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } @@ -665,7 +674,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { return 0; } - SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; initStorageAPI(&handle.api); switch (pMsg->msgType) { @@ -744,9 +753,9 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); case TDMT_STREAM_TASK_DISPATCH_RSP: return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECK: + case TDMT_VND_STREAM_TASK_CHECK: return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECK_RSP: + case TDMT_VND_STREAM_TASK_CHECK_RSP: return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg); case TDMT_STREAM_RETRIEVE: return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); @@ -754,10 +763,12 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); case TDMT_VND_STREAM_SCAN_HISTORY: return tqProcessTaskScanHistory(pVnode->pTq, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: return tqProcessTaskScanHistoryFinishReq(pVnode->pTq, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: return tqProcessTaskScanHistoryFinishRsp(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECKPOINT_READY: + return tqProcessStreamTaskCheckpointReadyMsg(pVnode->pTq, pMsg); default: vError("unknown msg type:%d in stream queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -765,7 +776,6 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) } void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { - // blockDebugShowDataBlocks(data, __func__); tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); } @@ -1668,7 +1678,7 @@ static int32_t vnodeConsolidateAlterHashRange(SVnode *pVnode, int64_t ver) { } static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { - vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confim msg is processed", TD_VID(pVnode)); + vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confirm msg is processed", TD_VID(pVnode)); int32_t code = TSDB_CODE_SUCCESS; if (!pVnode->config.hashChange) { goto _exit; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index d140c4a122..a71257eddf 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -14,6 +14,7 @@ */ #define _DEFAULT_SOURCE +#include "tq.h" #include "vnd.h" #define BATCH_ENABLE 0 @@ -216,7 +217,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType)); + vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); terrno = TSDB_CODE_SYN_RESTORING; vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); @@ -279,7 +281,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType)); + vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); @@ -526,7 +529,8 @@ static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *p } static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) { - SVnode *pVnode = pFsm->data; + SVnode *pVnode = pFsm->data; + int32_t vgId = pVnode->config.vgId; SyncIndex appliedIdx = -1; do { @@ -538,7 +542,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) } else { vInfo("vgId:%d, restore not finish since %" PRId64 " items to be applied. commit-index:%" PRId64 ", applied-index:%" PRId64, - pVnode->config.vgId, commitIdx - appliedIdx, commitIdx, appliedIdx); + vgId, commitIdx - appliedIdx, commitIdx, appliedIdx); taosMsleep(10); } } while (true); @@ -547,14 +551,19 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) walApplyVer(pVnode->pWal, commitIdx); pVnode->restored = true; - vInfo("vgId:%d, sync restore finished, start to restore stream tasks by replay wal", pVnode->config.vgId); - // start to restore all stream tasks - if (tsDisableStream) { - vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId); + if (vnodeIsRoleLeader(pVnode)) { + vInfo("vgId:%d, sync restore finished, start to launch stream tasks", vgId); + + // start to restore all stream tasks + if (tsDisableStream) { + vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", vgId); + } else { + vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); + tqCheckStreamStatus(pVnode->pTq); + } } else { - vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); - tqCheckStreamStatus(pVnode->pTq); + vInfo("vgId:%d, sync restore finished, no launch stream tasks since not leader", vgId); } } @@ -586,6 +595,9 @@ static void vnodeBecomeLearner(const SSyncFSM *pFsm) { static void vnodeBecomeLeader(const SSyncFSM *pFsm) { SVnode *pVnode = pFsm->data; + if (pVnode->pTq) { + tqUpdateNodeStage(pVnode->pTq); + } vDebug("vgId:%d, become leader", pVnode->config.vgId); } @@ -660,8 +672,8 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion) { vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) { SNodeInfo *pNode = &pCfg->nodeInfo[i]; - vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort, - pNode->nodeId, pNode->clusterId); + vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, + pNode->nodePort, pNode->nodeId, pNode->clusterId); } pVnode->sync = syncOpen(&syncInfo, vnodeVersion); diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 0bd35353e0..8726f57977 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -21,11 +21,11 @@ extern "C" { #include "os.h" #include "tcommon.h" +#include "theap.h" #include "tlosertree.h" #include "tsort.h" #include "ttszip.h" #include "tvariant.h" -#include "theap.h" #include "dataSinkMgt.h" #include "executil.h" @@ -39,22 +39,16 @@ extern "C" { #include "tlockfree.h" #include "tmsg.h" #include "tpagedbuf.h" -//#include "tstream.h" -//#include "tstreamUpdate.h" +// #include "tstream.h" +// #include "tstreamUpdate.h" #include "tlrucache.h" typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); typedef struct STsdbReader STsdbReader; -typedef struct STqReader STqReader; - - -typedef enum SOperatorParamType{ - OP_GET_PARAM = 1, - OP_NOTIFY_PARAM -} SOperatorParamType; - +typedef struct STqReader STqReader; +typedef enum SOperatorParamType { OP_GET_PARAM = 1, OP_NOTIFY_PARAM } SOperatorParamType; #define IS_VALID_SESSION_WIN(winInfo) ((winInfo).sessionWin.win.skey > 0) #define SET_SESSION_WIN_INVALID(winInfo) ((winInfo).sessionWin.win.skey = INT64_MIN) @@ -114,17 +108,17 @@ typedef struct SExchangeOpStopInfo { } SExchangeOpStopInfo; typedef struct SGcOperatorParam { - int64_t sessionId; - int32_t downstreamIdx; - int32_t vgId; - int64_t tbUid; - bool needCache; + int64_t sessionId; + int32_t downstreamIdx; + int32_t vgId; + int64_t tbUid; + bool needCache; } SGcOperatorParam; typedef struct SGcNotifyOperatorParam { - int32_t downstreamIdx; - int32_t vgId; - int64_t tbUid; + int32_t downstreamIdx; + int32_t vgId; + int64_t tbUid; } SGcNotifyOperatorParam; typedef struct SExprSupp { @@ -166,15 +160,15 @@ typedef struct SSortMergeJoinOperatorParam { } SSortMergeJoinOperatorParam; typedef struct SExchangeOperatorBasicParam { - int32_t vgId; - int32_t srcOpType; - bool tableSeq; - SArray* uidList; + int32_t vgId; + int32_t srcOpType; + bool tableSeq; + SArray* uidList; } SExchangeOperatorBasicParam; typedef struct SExchangeOperatorBatchParam { - bool multiParams; - SSHashObj* pBatchs; // SExchangeOperatorBasicParam + bool multiParams; + SSHashObj* pBatchs; // SExchangeOperatorBasicParam } SExchangeOperatorBatchParam; typedef struct SExchangeOperatorParam { @@ -259,7 +253,7 @@ typedef struct STableScanBase { SLimitInfo limitInfo; // there are more than one table list exists in one task, if only one vnode exists. STableListInfo* pTableListInfo; - TsdReader readerAPI; + TsdReader readerAPI; } STableScanBase; typedef struct STableScanInfo { @@ -275,7 +269,7 @@ typedef struct STableScanInfo { int8_t assignBlockUid; bool hasGroupByTag; bool countOnly; -// TsdReader readerAPI; + // TsdReader readerAPI; } STableScanInfo; typedef struct STableMergeScanInfo { @@ -309,21 +303,21 @@ typedef struct STagScanFilterContext { } STagScanFilterContext; typedef struct STagScanInfo { - SColumnInfo* pCols; - SSDataBlock* pRes; - SColMatchInfo matchInfo; - int32_t curPos; - SLimitNode* pSlimit; - SReadHandle readHandle; - STableListInfo* pTableListInfo; - uint64_t suid; - void* pCtbCursor; - SNode* pTagCond; - SNode* pTagIndexCond; + SColumnInfo* pCols; + SSDataBlock* pRes; + SColMatchInfo matchInfo; + int32_t curPos; + SLimitNode* pSlimit; + SReadHandle readHandle; + STableListInfo* pTableListInfo; + uint64_t suid; + void* pCtbCursor; + SNode* pTagCond; + SNode* pTagIndexCond; STagScanFilterContext filterCtx; - SArray* aUidTags; // SArray - SArray* aFilterIdxs; // SArray - SStorageAPI* pStorageAPI; + SArray* aUidTags; // SArray + SArray* aFilterIdxs; // SArray + SStorageAPI* pStorageAPI; } STagScanInfo; typedef enum EStreamScanMode { @@ -383,8 +377,6 @@ typedef struct STimeWindowAggSupp { int64_t waterMark; TSKEY maxTs; TSKEY minTs; - TSKEY checkPointTs; - TSKEY checkPointInterval; SColumnInfoData timeWindowData; // query time window info for scalar function execution. } STimeWindowAggSupp; @@ -407,20 +399,18 @@ typedef struct SStreamScanInfo { uint64_t numOfExec; // execution times STqReader* tqReader; - uint64_t groupId; + uint64_t groupId; struct SUpdateInfo* pUpdateInfo; EStreamScanMode scanMode; - struct SOperatorInfo* pStreamScanOp; - struct SOperatorInfo* pTableScanOp; + struct SOperatorInfo* pStreamScanOp; + struct SOperatorInfo* pTableScanOp; SArray* childIds; SWindowSupporter windowSup; SPartitionBySupporter partitionSup; SExprSupp* pPartScalarSup; bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA. int32_t scanWinIndex; // for state operator - int32_t pullDataResIndex; - SSDataBlock* pPullDataRes; // pull data SSDataBlock SSDataBlock* pDeleteDataRes; // delete data SSDataBlock int32_t deleteDataIndex; STimeWindow updateWin; @@ -435,12 +425,13 @@ typedef struct SStreamScanInfo { int32_t blockRecoverTotCnt; SSDataBlock* pRecoverRes; - SSDataBlock* pCreateTbRes; - int8_t igCheckUpdate; - int8_t igExpired; - void* pState; //void + SSDataBlock* pCreateTbRes; + int8_t igCheckUpdate; + int8_t igExpired; + void* pState; // void SStoreTqReader readerFn; - SStateStore stateStore; + SStateStore stateStore; + SSDataBlock* pCheckpointRes; } SStreamScanInfo; typedef struct { @@ -488,7 +479,7 @@ typedef struct SIntervalAggOperatorInfo { int64_t limit; bool slimited; int64_t slimit; - uint64_t curGroupId; // initialize to UINT64_MAX + uint64_t curGroupId; // initialize to UINT64_MAX uint64_t handledGroupNum; BoundedQueue* pBQ; } SIntervalAggOperatorInfo; @@ -502,6 +493,11 @@ typedef struct SMergeAlignedIntervalAggOperatorInfo { SResultRow* pResultRow; } SMergeAlignedIntervalAggOperatorInfo; +typedef struct SOpCheckPointInfo { + uint16_t checkPointId; + SHashObj* children; // key:child id +} SOpCheckPointInfo; + typedef struct SStreamIntervalOperatorInfo { SOptrBasicInfo binfo; // basic info SAggSupporter aggSup; // aggregate supporter @@ -523,15 +519,18 @@ typedef struct SStreamIntervalOperatorInfo { SSDataBlock* pPullDataRes; SArray* pChildren; int32_t numOfChild; - SStreamState* pState; // void + SStreamState* pState; // void SWinKey delKey; uint64_t numOfDatapack; SArray* pUpdated; SSHashObj* pUpdatedMap; int64_t dataVersion; - SStateStore statestore; + SStateStore stateStore; bool recvGetAll; SHashObj* pFinalPullDataMap; + SOpCheckPointInfo checkPointInfo; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamIntervalOperatorInfo; typedef struct SDataGroupInfo { @@ -578,6 +577,8 @@ typedef struct SStreamSessionAggOperatorInfo { int64_t dataVersion; SArray* historyWins; bool isHistoryOp; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -599,6 +600,8 @@ typedef struct SStreamStateAggOperatorInfo { int64_t dataVersion; bool isHistoryOp; SArray* historyWins; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamStateAggOperatorInfo; typedef struct SStreamPartitionOperatorInfo { @@ -652,7 +655,9 @@ typedef struct SStreamFillOperatorInfo { #define OPTR_SET_OPENED(_optr) ((_optr)->status |= OP_OPENED) SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode); -int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo); + +int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, + SExecTaskInfo* pTaskInfo); void cleanupQueriedTableScanInfo(void* p); void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); @@ -724,7 +729,8 @@ bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap); bool functionNeedToExecute(SqlFunctionCtx* pCtx); bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup); bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup); -bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, SStateStore* pStore); +bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, + SStateStore* pStore); void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, uint64_t* pGp, void* pTbName); uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); @@ -736,8 +742,8 @@ bool groupbyTbname(SNodeList* pGroupList); int32_t buildDataBlockFromGroupRes(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, SGroupResInfo* pGroupResInfo); int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t size, SStateStore* pAPI); -int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, - SExprSupp* pSup, SGroupResInfo* pGroupResInfo); +int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo); int32_t releaseOutputBuf(void* pState, SWinKey* pKey, SResultRow* pResult, SStateStore* pAPI); void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order); int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order, @@ -755,15 +761,17 @@ void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExpr void doClearBufferedBlocks(SStreamScanInfo* pInfo); uint64_t calcGroupId(char* pData, int32_t len); -void streamOpReleaseState(struct SOperatorInfo* pOperator); -void streamOpReloadState(struct SOperatorInfo* pOperator); +void streamOpReleaseState(struct SOperatorInfo* pOperator); +void streamOpReloadState(struct SOperatorInfo* pOperator); -void destroyOperatorParamValue(void* pValues); -int32_t mergeOperatorParams(SOperatorParam* pDst, SOperatorParam* pSrc); -int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, int32_t srcOpType, bool tableSeq); -void freeExchangeGetBasicOperatorParam(void* pParam); -void freeOperatorParam(SOperatorParam* pParam, SOperatorParamType type); -void freeResetOperatorParams(struct SOperatorInfo* pOperator, SOperatorParamType type, bool allFree); +int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup); +void* decodeSTimeWindowAggSupp(void* buf, STimeWindowAggSupp* pTwAggSup); +void destroyOperatorParamValue(void* pValues); +int32_t mergeOperatorParams(SOperatorParam* pDst, SOperatorParam* pSrc); +int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, int32_t srcOpType, bool tableSeq); +void freeExchangeGetBasicOperatorParam(void* pParam); +void freeOperatorParam(SOperatorParam* pParam, SOperatorParamType type); +void freeResetOperatorParams(struct SOperatorInfo* pOperator, SOperatorParamType type, bool allFree); SSDataBlock* getNextBlockFromDownstreamImpl(struct SOperatorInfo* pOperator, int32_t idx, bool clearParam); bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pBlockInfo); @@ -771,7 +779,7 @@ bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, bool compareVal(const char* v, const SStateKeys* pKey); int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, - TSKEY* primaryKeys, int32_t prevPosition, int32_t order); + TSKEY* primaryKeys, int32_t prevPosition, int32_t order); #ifdef __cplusplus } diff --git a/source/libs/executor/inc/operator.h b/source/libs/executor/inc/operator.h index 6335ac8181..13da9f7238 100644 --- a/source/libs/executor/inc/operator.h +++ b/source/libs/executor/inc/operator.h @@ -116,7 +116,7 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMerge SOperatorInfo* createMergeAlignedIntervalOperatorInfo(SOperatorInfo* downstream, SMergeAlignedIntervalPhysiNode* pNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); +SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle); SOperatorInfo* createSessionAggOperatorInfo(SOperatorInfo* downstream, SSessionWinodwPhysiNode* pSessionNode, SExecTaskInfo* pTaskInfo); @@ -146,7 +146,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle); -SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h index 3690e8d234..fcafd5a4e3 100644 --- a/source/libs/executor/inc/querytask.h +++ b/source/libs/executor/inc/querytask.h @@ -70,8 +70,6 @@ typedef struct { SVersionRange fillHistoryVer; STimeWindow fillHistoryWindow; SStreamState* pState; - int64_t dataVersion; - int64_t checkPointId; } SStreamTaskInfo; struct SExecTaskInfo { diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index 53c7c073ed..abe566473f 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -54,8 +54,8 @@ typedef struct SDataDispatchHandle { // clang-format off // data format: // +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+ -// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data | .... | | (4 bytes) |(8 bytes) -// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | | +// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data | +// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | | | // +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+ // The length of bitmap is decided by number of rows of this data block, and the length of each column data is // recorded in the first segment, next to the struct header diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 8d4b1c0ea0..60dc6f0185 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -14,9 +14,9 @@ */ #include "executor.h" -#include -#include #include "executorInt.h" +#include "trpc.h" +#include "wal.h" #include "operator.h" #include "planner.h" #include "querytask.h" @@ -149,11 +149,15 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - SPackedData tmp = { .pDataBlock = pDataBlock }; + SPackedData tmp = {.pDataBlock = pDataBlock}; taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; + } else if (type == STREAM_INPUT__CHECKPOINT) { + SPackedData tmp = {.pDataBlock = input}; + taosArrayPush(pInfo->pBlockLists, &tmp); + pInfo->blockType = STREAM_INPUT__CHECKPOINT; } else { ASSERT(0); } @@ -162,7 +166,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } } -void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI *pAPI) { +void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI* pAPI) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pStreamScanInfo = pOperator->info; @@ -203,13 +207,6 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { return code; } -void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) { - SExecTaskInfo* pTaskInfo = tinfo; - *dataVer = pTaskInfo->streamInfo.dataVersion; - *ckId = pTaskInfo->streamInfo.checkPointId; -} - - int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; @@ -330,7 +327,7 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v } static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList, const char* idstr, - SStorageAPI* pAPI) { + SStorageAPI* pAPI) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); int32_t numOfUids = taosArrayGetSize(tableIdList); if (numOfUids == 0) { @@ -341,7 +338,7 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S uint64_t suid = 0; uint64_t uid = 0; - int32_t type = 0; + int32_t type = 0; tableListGetSourceTableInfo(pTableScanInfo->base.pTableListInfo, &suid, &uid, &type); // let's discard the tables those are not created according to the queried super table. @@ -1156,7 +1153,7 @@ void qStreamSetOpen(qTaskInfo_t tinfo) { int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; SOperatorInfo* pOperator = pTaskInfo->pRoot; const char* id = GET_TASKID(pTaskInfo); @@ -1193,7 +1190,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT pScanBaseInfo->dataReader = NULL; SStoreTqReader* pReaderAPI = &pTaskInfo->storageAPI.tqReaderFn; - SWalReader* pWalReader = pReaderAPI->tqReaderGetWalReader(pInfo->tqReader); + SWalReader* pWalReader = pReaderAPI->tqReaderGetWalReader(pInfo->tqReader); walReaderVerifyOffset(pWalReader, pOffset); if (pReaderAPI->tqReaderSeek(pInfo->tqReader, pOffset->version, id) < 0) { qError("tqReaderSeek failed ver:%" PRId64 ", %s", pOffset->version, id); @@ -1251,8 +1248,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT pScanInfo->scanTimes = 0; if (pScanBaseInfo->dataReader == NULL) { - int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, - pScanInfo->pResBlock, (void**) &pScanBaseInfo->dataReader, id, false, NULL); + int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen( + pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, pScanInfo->pResBlock, + (void**)&pScanBaseInfo->dataReader, id, false, NULL); if (code != TSDB_CODE_SUCCESS) { qError("prepare read tsdb snapshot failed, uid:%" PRId64 ", code:%s %s", pOffset->uid, tstrerror(code), id); terrno = code; @@ -1310,8 +1308,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT STableKeyInfo* pList = tableListGetInfo(pTableListInfo, 0); int32_t size = tableListGetSize(pTableListInfo); - pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, (void**) &pInfo->dataReader, NULL, - false, NULL); + pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, + (void**)&pInfo->dataReader, NULL, false, NULL); cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond); strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName); @@ -1369,7 +1367,7 @@ void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = tinfo; - SArray* plist = getTableListInfo(pTaskInfo); + SArray* plist = getTableListInfo(pTaskInfo); // only extract table in the first elements STableListInfo* pTableListInfo = taosArrayGetP(plist, 0); @@ -1377,7 +1375,7 @@ SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { SArray* pUidList = taosArrayInit(10, sizeof(uint64_t)); int32_t numOfTables = tableListGetSize(pTableListInfo); - for(int32_t i = 0; i < numOfTables; ++i) { + for (int32_t i = 0; i < numOfTables; ++i) { STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i); taosArrayPush(pUidList, &pKeyInfo->uid); } diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index dda8b133ca..3a60a7bf83 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -30,10 +30,10 @@ #include "operator.h" #include "query.h" #include "querytask.h" +#include "storageapi.h" #include "tcompare.h" #include "thash.h" #include "ttypes.h" -#include "storageapi.h" #define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN) #define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) @@ -697,8 +697,8 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { uint32_t newSize = pBlock->info.rows + pRow->numOfRows + ((numOfRows - i) > 1 ? 1 : 0); blockDataEnsureCapacity(pBlock, newSize); - qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", - newSize, pBlock->info.capacity, GET_TASKID(pTaskInfo)); + qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", newSize, + pBlock->info.capacity, GET_TASKID(pTaskInfo)); // todo set the pOperator->resultInfo size } @@ -722,9 +722,9 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; - SSDataBlock* pBlock = pbInfo->pRes; + SSDataBlock* pBlock = pbInfo->pRes; // set output datablock version pBlock->info.version = pTaskInfo->version; @@ -737,10 +737,12 @@ void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGr // clear the existed group id pBlock->info.id.groupId = 0; ASSERT(!pbInfo->mergeResultBlock); - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + false); void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { + if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < + 0) { pBlock->info.parTbName[0] = 0; } else { memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); @@ -765,10 +767,12 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG // clear the existed group id pBlock->info.id.groupId = 0; if (!pbInfo->mergeResultBlock) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + false); } else { while (hasRemainResults(pGroupResInfo)) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, true); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + true); if (pBlock->info.rows >= pOperator->resultInfo.threshold) { break; } @@ -966,10 +970,10 @@ int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t si return TSDB_CODE_SUCCESS; } -int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, - SExprSupp* pSup, SGroupResInfo* pGroupResInfo) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; +int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; SExprInfo* pExprInfo = pSup->pExprInfo; int32_t numOfExprs = pSup->numOfExprs; @@ -986,8 +990,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa // ASSERT(code == 0); if (code == -1) { // for history - qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey, - pKey->win.ekey, pKey->groupId); + qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 "", + pKey->win.skey, pKey->win.ekey, pKey->groupId); pGroupResInfo->index += 1; continue; } @@ -1004,7 +1008,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa pBlock->info.id.groupId = pKey->groupId; void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { + if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, + &tbname) < 0) { pBlock->info.parTbName[0] = 0; } else { memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index f836e71bc9..9fce058c4c 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -1367,6 +1367,7 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { memcpy(pInfo->pSrcBlock->info.parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); pInfo->srcRowIndex = -1; } break; + case STREAM_CHECKPOINT: case STREAM_CREATE_CHILD_TABLE: { return pBlock; } break; diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 9ec95faa38..fb2204eae8 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1130,9 +1130,13 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pDelRes; } break; - default: - ASSERTS(pBlock->info.type == STREAM_CREATE_CHILD_TABLE || pBlock->info.type == STREAM_RETRIEVE, "invalid SSDataBlock type"); + case STREAM_CREATE_CHILD_TABLE: + case STREAM_RETRIEVE: + case STREAM_CHECKPOINT: { return pBlock; + } + default: + ASSERTS(0, "invalid SSDataBlock type"); } // there is an scalar expression that needs to be calculated right before apply the group aggregation. @@ -1185,8 +1189,8 @@ void initParDownStream(SOperatorInfo* downstream, SPartitionBySupporter* pParSup SStreamScanInfo* pScanInfo = downstream->info; pScanInfo->partitionSup = *pParSup; pScanInfo->pPartScalarSup = pExpr; - if (!pScanInfo->igCheckUpdate && !pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0); + if (!pScanInfo->pUpdateInfo) { + pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0, pScanInfo->igCheckUpdate); } } diff --git a/source/libs/executor/src/operator.c b/source/libs/executor/src/operator.c index d80cf812f8..6f9aac7595 100644 --- a/source/libs/executor/src/operator.c +++ b/source/libs/executor/src/operator.c @@ -479,7 +479,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { - pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); + pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode; pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); @@ -488,10 +488,10 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) { int32_t children = 0; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) { int32_t children = pHandle->numOfVgroups; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) { pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8d35a02c57..d0b892e0f1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -38,11 +38,12 @@ int32_t scanDebug = 0; -#define MULTI_READER_MAX_TABLE_NUM 5000 -#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) -#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) -#define STREAM_SCAN_OP_NAME "StreamScanOperator" -#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState" +#define MULTI_READER_MAX_TABLE_NUM 5000 +#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) +#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) +#define STREAM_SCAN_OP_NAME "StreamScanOperator" +#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState" +#define STREAM_SCAN_OP_CHECKPOINT_NAME "StreamScanOperator_Checkpoint" typedef struct STableMergeScanExecInfo { SFileBlockLoadRecorder blockRecorder; @@ -1958,23 +1959,46 @@ static void doCheckUpdate(SStreamScanInfo* pInfo, TSKEY endKey, SSDataBlock* pBl } } -//int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) { -// int32_t len = updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo); -// *pBuff = taosMemoryCalloc(1, len); -// updateInfoSerialize(*pBuff, len, pInfo->pUpdateInfo); -// return len; -//} +int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) { + int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo); + len += encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup); + *pBuff = taosMemoryCalloc(1, len); + void* buf = *pBuff; + encodeSTimeWindowAggSupp(&buf, &pInfo->twAggSup); + pInfo->stateStore.updateInfoSerialize(buf, len, pInfo->pUpdateInfo); + return len; +} + +void streamScanOperatorSaveCheckpoint(SStreamScanInfo* pInfo) { + if (!pInfo->pState) { + return; + } + void* pBuf = NULL; + int32_t len = streamScanOperatorEncode(pInfo, &pBuf); + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), pBuf, len); + taosMemoryFree(pBuf); + pInfo->stateStore.streamStateCommit(pInfo->pState); +} // other properties are recovered from the execution plan void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo) { if (!pBuff || len == 0) { return; } + void* buf = pBuff; + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + int32_t tlen = len - encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup); + if (tlen == 0) { + return; + } void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); - int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo); + int32_t code = pInfo->stateStore.updateInfoDeserialize(buf, tlen, pUpInfo); if (code == TSDB_CODE_SUCCESS) { + pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo); pInfo->pUpdateInfo = pUpInfo; + } else { + taosMemoryFree(pUpInfo); } } @@ -2155,6 +2179,9 @@ FETCH_NEXT_BLOCK: } } } break; + case STREAM_CHECKPOINT: { + qError("stream check point error. msg type: STREAM_INPUT__DATA_BLOCK"); + } break; default: break; } @@ -2295,6 +2322,23 @@ FETCH_NEXT_BLOCK: } goto NEXT_SUBMIT_BLK; + } else if (pInfo->blockType == STREAM_INPUT__CHECKPOINT) { + if (pInfo->validBlockIndex >= total) { + doClearBufferedBlocks(pInfo); + return NULL; + } + + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); + + SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); + SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + + if (pBlock->info.type == STREAM_CHECKPOINT) { + streamScanOperatorSaveCheckpoint(pInfo); + } + // printDataBlock(pBlock, "stream scan ck"); + return pInfo->pCheckpointRes; } return NULL; @@ -2458,11 +2502,12 @@ static void destroyStreamScanOperatorInfo(void* param) { pStreamScan->stateStore.updateInfoDestroy(pStreamScan->pUpdateInfo); blockDataDestroy(pStreamScan->pRes); blockDataDestroy(pStreamScan->pUpdateRes); - blockDataDestroy(pStreamScan->pPullDataRes); blockDataDestroy(pStreamScan->pDeleteDataRes); blockDataDestroy(pStreamScan->pUpdateDataRes); blockDataDestroy(pStreamScan->pCreateTbRes); taosArrayDestroy(pStreamScan->pBlockLists); + blockDataDestroy(pStreamScan->pCheckpointRes); + taosMemoryFree(pStreamScan); } @@ -2669,7 +2714,6 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; pInfo->windowSup = (SWindowSupporter){.pStreamAggSup = NULL, .gap = -1, .parentType = QUERY_NODE_PHYSICAL_PLAN}; pInfo->groupId = 0; - pInfo->pPullDataRes = createSpecialDataBlock(STREAM_RETRIEVE); pInfo->pStreamScanOp = pOperator; pInfo->deleteDataIndex = 0; pInfo->pDeleteDataRes = createSpecialDataBlock(STREAM_DELETE_DATA); @@ -2683,14 +2727,17 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->pState = pTaskInfo->streamInfo.pState; pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->readerFn = pTaskInfo->storageAPI.tqReaderFn; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); // for stream if (pTaskInfo->streamInfo.pState) { void* buff = NULL; int32_t len = 0; - pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_NAME, strlen(STREAM_SCAN_OP_NAME), &buff, &len); - streamScanOperatorDecode(buff, len, pInfo); - taosMemoryFree(buff); + int32_t res = pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + streamScanOperatorDecode(buff, len, pInfo); + taosMemoryFree(buff); + } } setOperatorInfo(pOperator, STREAM_SCAN_OP_NAME, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, false, OP_NOT_OPENED, pInfo, diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 01514ea88a..c0e2a44153 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -18,6 +18,7 @@ #include "functionMgt.h" #include "operator.h" #include "querytask.h" +#include "tchecksum.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" @@ -26,12 +27,15 @@ #include "tlog.h" #include "ttime.h" -#define IS_FINAL_INTERVAL_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) -#define IS_FINAL_SESSION_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) -#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); -#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" -#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" -#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" +#define IS_FINAL_INTERVAL_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) +#define IS_FINAL_SESSION_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) +#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); +#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" +#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" +#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" +#define STREAM_INTERVAL_OP_CHECKPOINT_NAME "StreamIntervalOperator_Checkpoint" +#define STREAM_SESSION_OP_CHECKPOINT_NAME "StreamSessionOperator_Checkpoint" +#define STREAM_STATE_OP_CHECKPOINT_NAME "StreamStateOperator_Checkpoint" typedef struct SStateWindowInfo { SResultWindowInfo winInfo; @@ -353,7 +357,7 @@ static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWin for (int32_t i = *index; i < size; i++) { SWinKey* pWin = taosArrayGet(pWins, i); void* tbname = NULL; - pInfo->statestore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); + pInfo->stateStore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); if (tbname == NULL) { appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, NULL); } else { @@ -361,7 +365,7 @@ static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWin STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, parTbName); } - pInfo->statestore.streamStateFreeVal(tbname); + pInfo->stateStore.streamStateFreeVal(tbname); (*index)++; } } @@ -381,7 +385,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pPullDataRes); taosArrayDestroy(pInfo->pDelWins); blockDataDestroy(pInfo->pDelRes); - pInfo->statestore.streamFileStateDestroy(pInfo->pState->pFileState); + pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); nodesDestroyNode((SNode*)pInfo->pPhyNode); @@ -392,6 +396,8 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { pInfo->pUpdatedMap = NULL; pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -416,7 +422,8 @@ void initIntervalDownStream(SOperatorInfo* downstream, uint16_t type, SStreamInt pScanInfo->windowSup.parentType = type; pScanInfo->windowSup.pIntervalAggSup = &pInfo->aggSup; if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark); + pScanInfo->pUpdateInfo = + pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark, pScanInfo->igCheckUpdate); } pScanInfo->interval = pInfo->interval; @@ -513,7 +520,7 @@ static void clearStreamIntervalOperator(SStreamIntervalOperatorInfo* pInfo) { clearDiskbasedBuf(pInfo->aggSup.pResultBuf); initResultRowInfo(&pInfo->binfo.resultRowInfo); pInfo->aggSup.currentPageId = -1; - pInfo->statestore.streamStateClear(pInfo->pState); + pInfo->stateStore.streamStateClear(pInfo->pState); } static void clearSpecialDataBlock(SSDataBlock* pBlock) { @@ -745,11 +752,6 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN return startPos; } -static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { - pTaskInfo->streamInfo.dataVersion = version; - pTaskInfo->streamInfo.checkPointId = ckId; -} - static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, uint64_t groupId, SSHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperator->info; @@ -794,7 +796,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat .groupId = groupId, }; void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); - if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->statestore) && isClosed && + if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->stateStore) && isClosed && !chIds) { SPullWindowInfo pull = { .window = nextWin, .groupId = groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; @@ -826,7 +828,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat } int32_t code = setIntervalOutputBuf(pInfo->pState, &nextWin, &pResPos, groupId, pSup->pCtx, numOfOutput, - pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->statestore); + pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->stateStore); pResult = (SResultRow*)pResPos->pRowBuff; if (code != TSDB_CODE_SUCCESS || pResult == NULL) { T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); @@ -914,6 +916,214 @@ static void resetUnCloseWinInfo(SSHashObj* winMap) { } } +int32_t encodeSWinKey(void** buf, SWinKey* key) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, key->ts); + tlen += taosEncodeFixedU64(buf, key->groupId); + return tlen; +} + +void* decodeSWinKey(void* buf, SWinKey* key) { + buf = taosDecodeFixedI64(buf, &key->ts); + buf = taosDecodeFixedU64(buf, &key->groupId); + return buf; +} + +int32_t encodeSRowBuffPos(void** buf, SRowBuffPos* pos) { + int32_t tlen = 0; + tlen += encodeSWinKey(buf, pos->pKey); + return tlen; +} + +void* decodeSRowBuffPos(void* buf, SRowBuffPos* pos) { + buf = decodeSWinKey(buf, pos->pKey); + return buf; +} + +int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, pTwAggSup->minTs); + tlen += taosEncodeFixedI64(buf, pTwAggSup->maxTs); + return tlen; +} + +void* decodeSTimeWindowAggSupp(void* buf, STimeWindowAggSupp* pTwAggSup) { + buf = taosDecodeFixedI64(buf, &pTwAggSup->minTs); + buf = taosDecodeFixedI64(buf, &pTwAggSup->maxTs); + return buf; +} + +int32_t encodeSTimeWindow(void** buf, STimeWindow* pWin) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, pWin->skey); + tlen += taosEncodeFixedI64(buf, pWin->ekey); + return tlen; +} + +void* decodeSTimeWindow(void* buf, STimeWindow* pWin) { + buf = taosDecodeFixedI64(buf, &pWin->skey); + buf = taosDecodeFixedI64(buf, &pWin->ekey); + return buf; +} + +int32_t encodeSPullWindowInfo(void** buf, SPullWindowInfo* pPullInfo) { + int32_t tlen = 0; + tlen += encodeSTimeWindow(buf, &pPullInfo->calWin); + tlen += taosEncodeFixedU64(buf, pPullInfo->groupId); + tlen += encodeSTimeWindow(buf, &pPullInfo->window); + return tlen; +} + +void* decodeSPullWindowInfo(void* buf, SPullWindowInfo* pPullInfo) { + buf = decodeSTimeWindow(buf, &pPullInfo->calWin); + buf = taosDecodeFixedU64(buf, &pPullInfo->groupId); + buf = decodeSTimeWindow(buf, &pPullInfo->window); + return buf; +} + +int32_t encodeSPullWindowInfoArray(void** buf, SArray* pPullInfos) { + int32_t tlen = 0; + int32_t size = taosArrayGetSize(pPullInfos); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + void* pItem = taosArrayGet(pPullInfos, i); + tlen += encodeSPullWindowInfo(buf, pItem); + } + return tlen; +} + +void* decodeSPullWindowInfoArray(void* buf, SArray* pPullInfos) { + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + for (int32_t i = 0; i < size; i++) { + SPullWindowInfo item = {0}; + buf = decodeSPullWindowInfo(buf, &item); + taosArrayPush(pPullInfos, &item); + } + return buf; +} + +int32_t doStreamIntervalEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.pResultRowHashTable + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->aggSup.pResultRowHashTable); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->aggSup.pResultRowHashTable, pIte, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pIte, &keyLen); + tlen += encodeSWinKey(buf, key); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pPullDataMap + int32_t size = taosHashGetSize(pInfo->pPullDataMap); + tlen += taosEncodeFixedI32(buf, size); + pIte = NULL; + keyLen = 0; + while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSWinKey(buf, key); + SArray* pArray = (SArray*)pIte; + int32_t chSize = taosArrayGetSize(pArray); + tlen += taosEncodeFixedI32(buf, chSize); + for (int32_t i = 0; i < chSize; i++) { + void* pChItem = taosArrayGet(pArray, i); + tlen += taosEncodeFixedI32(buf, *(int32_t*)pChItem); + } + } + + // 4.pPullWins + tlen += encodeSPullWindowInfoArray(buf, pInfo->pPullWins); + + // 5.dataVersion + tlen += taosEncodeFixedI64(buf, pInfo->dataVersion); + + // 6.checksum + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + + return tlen; +} + +void doStreamIntervalDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return; + } + + // 6.checksum + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return; + } + + // 1.pResultRowHashTable + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SWinKey key = {0}; + buf = decodeSWinKey(buf, &key); + SRowBuffPos* pPos = NULL; + int32_t resSize = pInfo->aggSup.resultRowSize; + pInfo->stateStore.streamStateAddIfNotExist(pInfo->pState, &key, (void**)&pPos, &resSize); + tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pPos, POINTER_BYTES); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pPullDataMap + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + for (int32_t i = 0; i < size; i++) { + SWinKey key = {0}; + SArray* pArray = taosArrayInit(0, sizeof(int32_t)); + buf = decodeSWinKey(buf, &key); + int32_t chSize = 0; + buf = taosDecodeFixedI32(buf, &chSize); + for (int32_t i = 0; i < chSize; i++) { + int32_t chId = 0; + buf = taosDecodeFixedI32(buf, &chId); + taosArrayPush(pArray, &chId); + } + taosHashPut(pInfo->pPullDataMap, &key, sizeof(SWinKey), &pArray, POINTER_BYTES); + } + + // 4.pPullWins + buf = decodeSPullWindowInfoArray(buf, pInfo->pPullWins); + + // 5.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); +} + +void doStreamIntervalSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamIntervalEncodeOpState(NULL, 0, pOperator); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamIntervalEncodeOpState(&pBuf, len, pOperator); + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); +} static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; @@ -966,21 +1176,18 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); } + if (pInfo->reCkBlock) { + pInfo->reCkBlock = false; + printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pInfo->pCheckpointRes; + } + setOperatorCompleted(pOperator); if (!IS_FINAL_INTERVAL_OP(pOperator)) { clearFunctionContext(&pOperator->exprSupp); // semi interval operator clear disk buffer clearStreamIntervalOperator(pInfo); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - qDebug("stask:%s ===stream===%s clear", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); - } else { - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; - } - qDebug("stask:%s ===stream===%s close", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); + qDebug("===stream===clear semi operator"); } return NULL; } else { @@ -1075,6 +1282,11 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + doStreamIntervalSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -1155,7 +1367,7 @@ static void streamIntervalReleaseState(SOperatorInfo* pOperator) { if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; int32_t resSize = sizeof(TSKEY); - pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize); } SStreamIntervalOperatorInfo* pInfo = pOperator->info; @@ -1172,12 +1384,12 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; int32_t size = 0; void* pBuf = NULL; - int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + int32_t code = pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); TSKEY ts = *(TSKEY*)pBuf; taosMemoryFree(pBuf); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); - pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); + pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); } SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.reloadStreamStateFn) { @@ -1186,7 +1398,8 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { } SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, int32_t numOfChild) { + SExecTaskInfo* pTaskInfo, int32_t numOfChild, + SReadHandle* pHandle) { SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); @@ -1211,9 +1424,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, .deleteMark = getDeleteMark(pIntervalPhyNode), .deleteMarkSaved = 0, .calTriggerSaved = 0, - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), }; ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; @@ -1266,12 +1476,13 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pUpdated = NULL; pInfo->pUpdatedMap = NULL; int32_t funResSize = getMaxFunResSize(&pOperator->exprSupp, numOfCols); - pInfo->pState->pFileState = - pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, - compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit( + tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); pInfo->dataVersion = 0; - pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); pOperator->operatorType = pPhyNode->type; if (!IS_FINAL_INTERVAL_OP(pOperator) || numOfChild == 0) { @@ -1293,6 +1504,16 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, goto _error; } + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = pAPI->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamIntervalDecodeOpState(buff, len, pOperator); + taosMemoryFree(buff); + } + return pOperator; _error: @@ -1314,6 +1535,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { SStreamSessionAggOperatorInfo* pInfo = (SStreamSessionAggOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); + cleanupExprSupp(&pInfo->scalarSupp); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); @@ -1327,11 +1549,13 @@ void destroyStreamSessionAggOperatorInfo(void* param) { colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pWinBlock); - blockDataDestroy(pInfo->pUpdateRes); tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStDeleted); + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); taosArrayDestroy(pInfo->historyWins); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -1374,7 +1598,8 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin pScanInfo->windowSup = (SWindowSupporter){.pStreamAggSup = pAggSup, .gap = pAggSup->gap, .parentType = type}; pScanInfo->pState = pAggSup->pState; if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark); + pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark, + pScanInfo->igCheckUpdate); } pScanInfo->twAggSup = *pTwSup; } @@ -1651,6 +1876,31 @@ static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* return winNum; } +static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SResultRow* pCurResult = NULL; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + // Just look for the window behind StartIndex + while (1) { + SResultWindowInfo winInfo = {0}; + SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, NULL, pCurWin, &winInfo); + if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || + !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { + taosMemoryFree(winInfo.pOutputBuf); + pAPI->stateStore.streamStateFreeCur(pCur); + break; + } + pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); + doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); + pAPI->stateStore.streamStateFreeCur(pCur); + taosMemoryFree(winInfo.pOutputBuf); + } +} + int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, &pAggSup->stateStore); @@ -1858,6 +2108,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); continue; } @@ -1866,6 +2117,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); break; } } @@ -1876,7 +2128,9 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); saveResult(parentWin, pStUpdated); + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); } else { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); break; } } @@ -2003,6 +2257,137 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { } } +int32_t encodeSSessionKey(void** buf, SSessionKey* key) { + int32_t tlen = 0; + tlen += encodeSTimeWindow(buf, &key->win); + tlen += taosEncodeFixedU64(buf, key->groupId); + return tlen; +} + +void* decodeSSessionKey(void* buf, SSessionKey* key) { + buf = decodeSTimeWindow(buf, &key->win); + buf = taosDecodeFixedU64(buf, &key->groupId); + return buf; +} + +int32_t encodeSResultWindowInfo(void** buf, SResultWindowInfo* key, int32_t outLen) { + int32_t tlen = 0; + tlen += taosEncodeFixedBool(buf, key->isOutput); + tlen += encodeSSessionKey(buf, &key->sessionWin); + return tlen; +} + +void* decodeSResultWindowInfo(void* buf, SResultWindowInfo* key, int32_t outLen) { + buf = taosDecodeFixedBool(buf, &key->isOutput); + key->pOutputBuf = NULL; + buf = decodeSSessionKey(buf, &key->sessionWin); + return buf; +} + +int32_t doStreamSessionEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.streamAggSup.pResultRows + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->streamAggSup.pResultRows); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->streamAggSup.pResultRows, pIte, &iter)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSSessionKey(buf, key); + tlen += encodeSResultWindowInfo(buf, pIte, pInfo->streamAggSup.resultRowSize); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = taosArrayGetSize(pInfo->pChildren); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + tlen += doStreamSessionEncodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + tlen += taosEncodeFixedI32(buf, pInfo->dataVersion); + + // 5.checksum + if (isParent) { + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + } + + return tlen; +} + +void* doStreamSessionDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return buf; + } + + // 5.checksum + if (isParent) { + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return buf; + } + } + + // 1.streamAggSup.pResultRows + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SSessionKey key = {0}; + SResultWindowInfo winfo = {0}; + buf = decodeSSessionKey(buf, &key); + buf = decodeSResultWindowInfo(buf, &winfo, pInfo->streamAggSup.resultRowSize); + tSimpleHashPut(pInfo->streamAggSup.pResultRows, &key, sizeof(SSessionKey), &winfo, sizeof(SResultWindowInfo)); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + ASSERT(size <= taosArrayGetSize(pInfo->pChildren)); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + buf = doStreamSessionDecodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); + return buf; +} + +void doStreamSessionSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamSessionEncodeOpState(NULL, 0, pOperator, true); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamSessionEncodeOpState(&pBuf, len, pOperator, true); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_CHECKPOINT_NAME, + strlen(STREAM_SESSION_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); +} + static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SExprSupp* pSup = &pOperator->exprSupp; SStreamSessionAggOperatorInfo* pInfo = pOperator->info; @@ -2058,6 +2443,11 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAggSup->stateStore.streamStateCommit(pAggSup->pState); + doStreamSessionSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2115,13 +2505,11 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { } void streamSessionReleaseState(SOperatorInfo* pOperator) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); - pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, - resSize); - } + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, + resSize); SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { downstream->fpSet.releaseStreamStateFn(downstream); @@ -2133,6 +2521,33 @@ void resetWinRange(STimeWindow* winRange) { winRange->ekey = INT64_MAX; } +void streamSessionSemiReloadState(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SResultWindowInfo winInfo = {0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + for (int32_t i = 0; i < num; i++) { + SResultWindowInfo winInfo = {0}; + setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + compactSessionSemiWindow(pOperator, &winInfo); + saveSessionOutputBuf(pAggSup, &winInfo); + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + void streamSessionReloadState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; @@ -2249,7 +2664,19 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh if (pHandle) { pInfo->isHistoryOp = pHandle->fillHistory; } + + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = + pInfo->streamAggSup.stateStore.streamStateGetInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_CHECKPOINT_NAME, + strlen(STREAM_SESSION_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamSessionDecodeOpState(buff, len, pOperator, true); + taosMemoryFree(buff); + } setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, @@ -2316,7 +2743,6 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { while (1) { SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { - clearSpecialDataBlock(pInfo->pUpdateRes); pOperator->status = OP_RES_TO_RETURN; break; } @@ -2336,6 +2762,10 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAggSup->stateStore.streamStateCommit(pAggSup->pState); + doStreamSessionSaveCheckpoint(pOperator); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2357,6 +2787,11 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); tSimpleHashCleanup(pInfo->pStUpdated); pInfo->pStUpdated = NULL; + + if(pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); @@ -2387,12 +2822,12 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream pOperator->operatorType = pPhyNode->type; if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { - pInfo->pUpdateRes = createSpecialDataBlock(STREAM_CLEAR); - blockDataEnsureCapacity(pInfo->pUpdateRes, 128); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionSemiReloadState); } - setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); + setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, + pTaskInfo); if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); @@ -2428,6 +2863,7 @@ void destroyStreamStateOperatorInfo(void* param) { cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); cleanupGroupResInfo(&pInfo->groupResInfo); + cleanupExprSupp(&pInfo->scalarSupp); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { @@ -2441,6 +2877,9 @@ void destroyStreamStateOperatorInfo(void* param) { taosArrayDestroy(pInfo->historyWins); tSimpleHashCleanup(pInfo->pSeUpdated); tSimpleHashCleanup(pInfo->pSeDeleted); + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -2648,6 +3087,109 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl } } +int32_t doStreamStateEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.streamAggSup.pResultRows + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->streamAggSup.pResultRows); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->streamAggSup.pResultRows, pIte, &iter)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSSessionKey(buf, key); + tlen += encodeSResultWindowInfo(buf, pIte, pInfo->streamAggSup.resultRowSize); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = taosArrayGetSize(pInfo->pChildren); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + tlen += doStreamStateEncodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + tlen += taosEncodeFixedI32(buf, pInfo->dataVersion); + + // 5.checksum + if (isParent) { + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + } + + return tlen; +} + +void* doStreamStateDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return buf; + } + + // 5.checksum + if (isParent) { + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return buf; + } + } + + // 1.streamAggSup.pResultRows + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SSessionKey key = {0}; + SResultWindowInfo winfo = {0}; + buf = decodeSSessionKey(buf, &key); + buf = decodeSResultWindowInfo(buf, &winfo, pInfo->streamAggSup.resultRowSize); + tSimpleHashPut(pInfo->streamAggSup.pResultRows, &key, sizeof(SSessionKey), &winfo, sizeof(SResultWindowInfo)); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + ASSERT(size <= taosArrayGetSize(pInfo->pChildren)); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + buf = doStreamStateDecodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); + return buf; +} + +void doStreamStateSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamStateEncodeOpState(NULL, 0, pOperator, true); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamStateEncodeOpState(&pBuf, len, pOperator, true); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, + strlen(STREAM_STATE_OP_CHECKPOINT_NAME), buf, len); +} + static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) { SStreamStateAggOperatorInfo* pInfo = pOperator->info; SOptrBasicInfo* pBInfo = &pInfo->binfo; @@ -2700,7 +3242,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { if (pBlock == NULL) { break; } - printDataBlock(pBlock, "single state recv", GET_TASKID(pTaskInfo)); + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || pBlock->info.type == STREAM_CLEAR) { @@ -2715,6 +3257,11 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pInfo->streamAggSup.stateStore.streamStateCommit(pInfo->streamAggSup.pState); + doStreamSessionSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2926,6 +3473,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->isHistoryOp = pHandle->fillHistory; } + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); + + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = + pInfo->streamAggSup.stateStore.streamStateGetInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, + strlen(STREAM_STATE_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamStateDecodeOpState(buff, len, pOperator, true); + taosMemoryFree(buff); + } + setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, @@ -2984,14 +3544,13 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); } - setOperatorCompleted(pOperator); - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; + if (pInfo->reCkBlock) { + pInfo->reCkBlock = false; + // printDataBlock(pInfo->pCheckpointRes, "single interval ck"); + return pInfo->pCheckpointRes; } + + setOperatorCompleted(pOperator); return NULL; } @@ -3030,6 +3589,12 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { printDataBlock(pBlock, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + doStreamIntervalSaveCheckpoint(pOperator); + pInfo->reCkBlock = true; + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -3078,7 +3643,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { } SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo) { + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -3100,16 +3665,11 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision, }; - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pIntervalPhyNode->window.watermark, - .calTrigger = pIntervalPhyNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode), - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), - }; + pInfo->twAggSup = (STimeWindowAggSupp){.waterMark = pIntervalPhyNode->window.watermark, + .calTrigger = pIntervalPhyNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + .deleteMark = getDeleteMark(pIntervalPhyNode)}; ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); @@ -3168,7 +3728,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, - pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -3176,8 +3736,19 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); - pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); + + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = pAPI->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamIntervalDecodeOpState(buff, len, pOperator); + taosMemoryFree(buff); + } initIntervalDownStream(downstream, pPhyNode->type, pInfo); code = appendDownstream(pOperator, &downstream, 1); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 2405d3edef..db7c5e2570 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -18,6 +18,7 @@ #include "functionMgt.h" #include "operator.h" #include "querytask.h" +#include "tchecksum.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" @@ -55,7 +56,6 @@ typedef enum SResultTsInterpType { RESULT_ROW_END_INTERP = 2, } SResultTsInterpType; - typedef struct SOpenWindowInfo { SResultRowPosition pos; uint64_t groupId; @@ -388,7 +388,7 @@ static bool setTimeWindowInterpolationEndTs(SIntervalAggOperatorInfo* pInfo, SEx bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, TSKEY calEnd, EStreamType blockType) { if (pInterval->interval != pInterval->sliding && - ((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart) )) { + ((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart))) { return false; } @@ -400,7 +400,7 @@ bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pB } int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, - TSKEY* primaryKeys, int32_t prevPosition, int32_t order) { + TSKEY* primaryKeys, int32_t prevPosition, int32_t order) { bool ascQuery = (order == TSDB_ORDER_ASC); int32_t precision = pInterval->precision; @@ -632,8 +632,8 @@ static void doInterpUnclosedTimeWindow(SOperatorInfo* pOperatorInfo, int32_t num } static bool tsKeyCompFn(void* l, void* r, void* param) { - TSKEY* lTS = (TSKEY*)l; - TSKEY* rTS = (TSKEY*)r; + TSKEY* lTS = (TSKEY*)l; + TSKEY* rTS = (TSKEY*)r; SIntervalAggOperatorInfo* pInfo = param; return pInfo->binfo.outputTsOrder == ORDER_ASC ? *lTS < *rTS : *lTS > *rTS; } @@ -728,8 +728,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul } TSKEY ekey = ascScan ? win.ekey : win.skey; - int32_t forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder); + int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + pInfo->binfo.inputTsOrder); // prev time window not interpolation yet. if (pInfo->timeWindowInterpo) { @@ -756,7 +756,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul STimeWindow nextWin = win; while (1) { int32_t prevEndPos = forwardRows - 1 + startPos; - startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, pInfo->binfo.inputTsOrder); + startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, + pInfo->binfo.inputTsOrder); if (startPos < 0 || filterWindowWithLimit(pInfo, &nextWin, tableGroupId)) { break; } @@ -768,8 +769,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul } ekey = ascScan ? nextWin.ekey : nextWin.skey; - forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder); + forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + pInfo->binfo.inputTsOrder); // window start(end) key interpolation doWindowBorderInterpolation(pInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pSup); // TODO: add to open window? how to close the open windows after input blocks exhausted? @@ -1116,7 +1117,6 @@ static void doClearWindowImpl(SResultRowPosition* p1, SDiskbasedBuf* pResultBuf, releaseBufPage(pResultBuf, bufPage); } - static void destroyStateWindowOperatorInfo(void* param) { SStateWindowOperatorInfo* pInfo = (SStateWindowOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); @@ -1153,7 +1153,6 @@ void destroyIntervalOperatorInfo(void* param) { taosMemoryFreeClear(param); } - static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SIntervalAggOperatorInfo* pInfo) { // the primary timestamp column bool needed = false; @@ -1208,13 +1207,6 @@ static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SInt return needed; } - -void initStreamFunciton(SqlFunctionCtx* pCtx, int32_t numOfExpr) { - for (int32_t i = 0; i < numOfExpr; i++) { - // pCtx[i].isStream = true; - } -} - SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo) { SIntervalAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIntervalAggOperatorInfo)); @@ -1235,8 +1227,8 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPh int32_t num = 0; SExprInfo* pExprInfo = createExprInfo(pPhyNode->window.pFuncs, NULL, &num); - int32_t code = - initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str, pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore); + int32_t code = initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str, + pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -1476,7 +1468,8 @@ SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SStateWi if (pStateNode->window.pExprs != NULL) { int32_t numOfScalarExpr = 0; SExprInfo* pScalarExprInfo = createExprInfo(pStateNode->window.pExprs, NULL, &numOfScalarExpr); - int32_t code = initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore); + int32_t code = + initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -1615,7 +1608,6 @@ _error: return NULL; } - void destroyMAIOperatorInfo(void* param) { SMergeAlignedIntervalAggOperatorInfo* miaInfo = (SMergeAlignedIntervalAggOperatorInfo*)param; destroyIntervalOperatorInfo(miaInfo->intervalAggOperatorInfo); @@ -1979,8 +1971,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* } TSKEY ekey = ascScan ? win.ekey : win.skey; - int32_t forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder); + int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + iaInfo->binfo.inputTsOrder); ASSERT(forwardRows > 0); // prev time window not interpolation yet. @@ -2010,8 +2002,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* STimeWindow nextWin = win; while (1) { int32_t prevEndPos = forwardRows - 1 + startPos; - startPos = - getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, iaInfo->binfo.inputTsOrder); + startPos = getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, + iaInfo->binfo.inputTsOrder); if (startPos < 0) { break; } @@ -2025,8 +2017,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* } ekey = ascScan ? nextWin.ekey : nextWin.skey; - forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder); + forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + iaInfo->binfo.inputTsOrder); // window start(end) key interpolation doWindowBorderInterpolation(iaInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pExprSup); diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index b6bc9c888b..39854d1824 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -44,9 +44,11 @@ typedef struct { int64_t defaultCfInit; } SBackendWrapper; -void* streamBackendInit(const char* path); +void* streamBackendInit(const char* path, int64_t chkpId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); +int32_t streamBackendLoadCheckpointInfo(void* pMeta); +int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); @@ -135,5 +137,10 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb void* val, int32_t vlen, int64_t ttl, void* tmpBuf); int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch); +int32_t streamBackendTriggerChkp(void* pMeta, char* dst); + +int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId); +int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); + // int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); #endif \ No newline at end of file diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index ffd0eedea1..43a7232213 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -19,7 +19,7 @@ #include "executor.h" #include "query.h" #include "tstream.h" - +#include "streamBackendRocksdb.h" #include "trpc.h" #ifdef __cplusplus @@ -41,11 +41,15 @@ typedef struct { } SStreamContinueExecInfo; extern SStreamGlobalEnv streamEnv; +extern int32_t streamBackendId; +extern int32_t streamBackendCfWrapperId; -void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); -int32_t streamDispatchStreamBlock(SStreamTask* pTask); +const char* streamGetBlockTypeStr(int32_t type); +void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); +int32_t streamDispatchStreamBlock(SStreamTask* pTask); -SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); +int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); +SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, SArray* pRes); void destroyStreamDataBlock(SStreamDataBlock* pBlock); @@ -55,13 +59,19 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); +int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); -int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet); +int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask); +int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); +int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks); SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen); int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 5b6238330d..77a7456745 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,10 +16,9 @@ #include "streamInt.h" #include "ttimer.h" -#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 -#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) - -#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F) +#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 +#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) +#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F) SStreamGlobalEnv streamEnv; int32_t streamInit() { @@ -30,7 +29,7 @@ int32_t streamInit() { } if (old == 0) { - streamEnv.timer = taosTmrInit(10000, 100, 10000, "STREAM"); + streamEnv.timer = taosTmrInit(1000, 100, 10000, "STREAM"); if (streamEnv.timer == NULL) { atomic_store_8(&streamEnv.inited, 0); return -1; @@ -67,7 +66,6 @@ static void streamSchedByTimer(void* param, void* tmrId) { qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->triggerParam); if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { - streamMetaReleaseTask(NULL, pTask); qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr); return; } @@ -104,7 +102,7 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); ASSERT(ref == 2 && pTask->schedTimer == NULL); - qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam); + qDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->triggerParam); pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer); pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE; @@ -141,14 +139,55 @@ int32_t streamSchedExec(SStreamTask* pTask) { return 0; } +static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { + *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); + if (*pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); + SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); + + pDispatchRsp->inputStatus = status; + pDispatchRsp->streamId = htobe64(pReq->streamId); + pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); + pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); + pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); + pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); + + return TSDB_CODE_SUCCESS; +} + +static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { + int8_t status = 0; + + SStreamDataBlock* pBlock = createStreamBlockFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); + if (pBlock == NULL) { + streamTaskInputFail(pTask); + status = TASK_INPUT_STATUS__FAILED; + qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, + pTask->id.idStr); + } else { + if (pBlock->type == STREAM_INPUT__TRANS_STATE) { + pTask->status.appendTranstateBlock = true; + } + + int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); + // input queue is full, upstream is blocked now + status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; + } + + return status; +} + int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); int8_t status = TASK_INPUT_STATUS__NORMAL; // enqueue if (pData != NULL) { - qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, - pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); + qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, + pTask->info.selfChildId, pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; pData->srcVgId = 0; @@ -181,7 +220,7 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock int32_t code = 0; int32_t type = pTask->outputInfo.type; if (type == TASK_OUTPUT__TABLE) { - pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks); + pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks); destroyStreamDataBlock(pBlock); } else if (type == TASK_OUTPUT__SMA) { pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); @@ -200,76 +239,76 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock return 0; } +// static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { +// int8_t status = 0; +// +// SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); +// if (pBlock == NULL) { +// streamTaskInputFail(pTask); +// status = TASK_INPUT_STATUS__FAILED; +// qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, +// pTask->id.idStr); +// } else { +// if (pBlock->type == STREAM_INPUT__TRANS_STATE) { +// pTask->status.appendTranstateBlock = true; +// } +// +// int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); +// // input queue is full, upstream is blocked now +// status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; +// } +// +// return status; +// } - -static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { - int8_t status = 0; - - SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); - if (pBlock == NULL) { - streamTaskInputFail(pTask); - status = TASK_INPUT_STATUS__FAILED; - qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, - pTask->id.idStr); - } else { - if (pBlock->type == STREAM_INPUT__TRANS_STATE) { - pTask->status.appendTranstateBlock = true; - } - - int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); - // input queue is full, upstream is blocked now - status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; - } - - return status; -} - -static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { - *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); - if (*pBuf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); - SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); - - pDispatchRsp->inputStatus = status; - pDispatchRsp->streamId = htobe64(pReq->streamId); - pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); - pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); - pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); - pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); - - return TSDB_CODE_SUCCESS; -} - -void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); - if (pInfo != NULL) { - pInfo->dataAllowed = false; - } -} - +// static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** +// pBuf) { +// *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); +// if (*pBuf == NULL) { +// return TSDB_CODE_OUT_OF_MEMORY; +// } +// +// ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); +// SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); +// +// pDispatchRsp->inputStatus = status; +// pDispatchRsp->streamId = htobe64(pReq->streamId); +// pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); +// pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); +// pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); +// pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); +// +// return TSDB_CODE_SUCCESS; +// } int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); - int32_t status = 0; SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); - if (!pInfo->dataAllowed) { - qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, pReq->upstreamTaskId); + // upstream task has restarted/leader-follower switch/transferred to other dnodes + if (pReq->stage > pInfo->stage) { + qError("s-task:%s upstream task:0x%x (vgId:%d) has restart/leader-switch/vnode-transfer, prev stage:%" PRId64 + ", current:%" PRId64 " dispatch msg rejected", + pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pInfo->stage, pReq->stage); status = TASK_INPUT_STATUS__BLOCKED; } else { - // Current task has received the checkpoint req from the upstream task, from which the message should all be blocked - if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); - qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); - } + if (!pInfo->dataAllowed) { + qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, + pReq->upstreamTaskId); + status = TASK_INPUT_STATUS__BLOCKED; + } else { + // Current task has received the checkpoint req from the upstream task, from which the message should all be + // blocked + if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); + qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); + } - status = streamTaskAppendInputBlocks(pTask, pReq); + status = streamTaskAppendInputBlocks(pTask, pReq); + } } { @@ -290,30 +329,10 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S return 0; } -//int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { -// qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, -// pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); -// -// // todo add the input queue buffer limitation -// streamTaskEnqueueBlocks(pTask, pReq, pRsp); -// tDeleteStreamDispatchReq(pReq); -// -// if (exec) { -// if (streamTryExec(pTask) < 0) { -// return -1; -// } -// } else { -// streamSchedExec(pTask); -// } -// -// return 0; -//} - int32_t streamProcessRunReq(SStreamTask* pTask) { if (streamTryExec(pTask) < 0) { return -1; } - return 0; } @@ -338,9 +357,10 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) { - qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, - size); + qError( + "s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push " + "data", + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); taosFreeQitem(pItem); return -1; @@ -361,23 +381,24 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { msgLen, ver, total, size + SIZE_IN_MB(msgLen)); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { - if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */(tInputQueueIsFull(pTask))) { + if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */ (tInputQueueIsFull(pTask))) { qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, - size); - destroyStreamDataBlock((SStreamDataBlock*) pItem); + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + destroyStreamDataBlock((SStreamDataBlock*)pItem); return -1; } qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem); if (code != TSDB_CODE_SUCCESS) { - destroyStreamDataBlock((SStreamDataBlock*) pItem); + destroyStreamDataBlock((SStreamDataBlock*)pItem); return code; } - } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__TRANS_STATE) { + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || + type == STREAM_INPUT__TRANS_STATE) { taosWriteQitem(pTask->inputQueue->queue, pItem); - qDebug("s-task:%s checkpoint/trans-state blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); + qDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, + pTask->info.taskLevel, streamGetBlockTypeStr(type), total, size); } else if (type == STREAM_INPUT__GET_RES) { // use the default memory limit, refactor later. taosWriteQitem(pTask->inputQueue->queue, pItem); @@ -388,7 +409,7 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); - qDebug("s-task:%s new data arrived, active the trigger, trigerStatus:%d", pTask->id.idStr, pTask->triggerStatus); + qDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->triggerStatus); } return 0; @@ -416,26 +437,34 @@ void* streamQueueNextItem(SStreamQueue* pQueue) { void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); } -SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { - int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); - for(int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + if (num == 0) { + return; + } + + for (int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + pInfo->dataAllowed = true; + } +} + +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + if (pInfo != NULL) { + pInfo->dataAllowed = false; + } +} + +SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); if (pInfo->taskId == taskId) { return pInfo; } } + qError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId); return NULL; -} - -void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); - if (num == 0) { - return; - } - - for(int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); - pInfo->dataAllowed = true; - } -} +} \ No newline at end of file diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 571aca9935..82fa21ea40 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -20,6 +20,27 @@ #include "tcommon.h" #include "tref.h" +typedef struct { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; +} SBackendManager; + typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; @@ -41,7 +62,8 @@ typedef struct { } RocksdbCfInst; uint32_t nextPow2(uint32_t x); -int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); + +int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); void destroyRocksdbCfInst(RocksdbCfInst* inst); @@ -126,6 +148,218 @@ void destroyFunc(void* arg); int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest); int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest); +SBackendManager* bkdMgtCreate(char* path) { + SBackendManager* p = taosMemoryCalloc(1, sizeof(SBackendManager)); + p->curChkpId = 0; + p->preCkptId = 0; + p->pSST = taosArrayInit(64, sizeof(void*)); + p->path = taosStrdup(path); + p->len = strlen(path) + 128; + p->buf = taosMemoryCalloc(1, p->len); + + p->idx = 0; + p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + + p->pAdd = taosArrayInit(64, sizeof(void*)); + p->pDel = taosArrayInit(64, sizeof(void*)); + p->update = 0; + return p; +} +void bkdMgtDestroy(SBackendManager* bm) { + if (bm == NULL) return; + + taosMemoryFree(bm->buf); + taosMemoryFree(bm->path); + + taosArrayDestroyP(bm->pSST, taosMemoryFree); + taosArrayDestroyP(bm->pAdd, taosMemoryFree); + taosArrayDestroyP(bm->pDel, taosMemoryFree); + + taosHashCleanup(bm->pSstTbl[0]); + taosHashCleanup(bm->pSstTbl[1]); + taosMemoryFree(bm); +} + +int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { + int32_t code = 0; + size_t len = 0; + void* pIter = taosHashIterate(p2, NULL); + while (pIter) { + char* name = taosHashGetKey(pIter, &len); + if (!taosHashGet(p1, name, len)) { + char* p = taosStrdup(name); + taosArrayPush(diff, &p); + } + pIter = taosHashIterate(p2, pIter); + } + return code; +} +int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { + int32_t code = 0; + + code = compareHashTableImpl(p1, p2, add); + code = compareHashTableImpl(p2, p1, del); + + return code; +} +int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + const char* pSST = ".sst"; + int32_t sstLen = strlen(pSST); + + memset(bm->buf, 0, bm->len); + sprintf(bm->buf, "%s%scheckpoint%" PRId64 "", bm->path, TD_DIRSEP, chkpId); + + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + + TdDirPtr pDir = taosOpenDir(bm->buf); + TdDirEntryPtr de = NULL; + int8_t dummy = 0; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { + taosMemoryFreeClear(bm->pCurrent); + bm->pCurrent = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + + if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + taosMemoryFreeClear(bm->pManifest); + bm->pManifest = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { + char* p = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + } + if (bm->init == 0) { + bm->preCkptId = -1; + bm->curChkpId = chkpId; + bm->init = 1; + + void* pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], NULL); + while (pIter) { + size_t len; + char* name = taosHashGetKey(pIter, &len); + if (name != NULL && len != 0) { + taosArrayPush(bm->pAdd, &name); + } + pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], pIter); + } + if (taosArrayGetSize(bm->pAdd) > 0) bm->update = 1; + } else { + int32_t code = compareHashTable(bm->pSstTbl[bm->idx], bm->pSstTbl[1 - bm->idx], bm->pAdd, bm->pDel); + if (code != 0) { + // dead code + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + taosHashClear(bm->pSstTbl[1 - bm->idx]); + bm->update = 0; + + return code; + } + + bm->preCkptId = bm->curChkpId; + bm->curChkpId = chkpId; + if (taosArrayGetSize(bm->pAdd) == 0 && taosArrayGetSize(bm->pDel) == 0) { + bm->update = 0; + } + } + taosHashClear(bm->pSstTbl[bm->idx]); + bm->idx = 1 - bm->idx; + + return 0; +} + +int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { + int32_t code = 0; + int32_t len = bm->len + 128; + + char* dstBuf = taosMemoryCalloc(1, len); + char* srcBuf = taosMemoryCalloc(1, len); + + char* srcDir = taosMemoryCalloc(1, len); + char* dstDir = taosMemoryCalloc(1, len); + + sprintf(srcDir, "%s%s%s%" PRId64 "", bm->path, TD_DIRSEP, "checkpoint", bm->curChkpId); + sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); + + if (!taosDirExist(srcDir)) { + return 0; + } + + code = taosMkDir(dstDir); + if (code != 0) { + return code; + } + + // clear current file + memset(dstBuf, 0, len); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); + taosRemoveFile(dstBuf); + + memset(dstBuf, 0, len); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); + taosRemoveFile(dstBuf); + + // add file to $name dir + for (int i = 0; i < taosArrayGetSize(bm->pAdd); i++) { + memset(dstBuf, 0, len); + memset(srcBuf, 0, len); + + char* filename = taosArrayGetP(bm->pAdd, i); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + + taosCopyFile(srcBuf, dstBuf); + } + // del file in $name + for (int i = 0; i < taosArrayGetSize(bm->pDel); i++) { + memset(dstBuf, 0, len); + memset(srcBuf, 0, len); + + char* filename = taosArrayGetP(bm->pDel, i); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + taosRemoveFile(dstBuf); + } + + // copy current file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pCurrent); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); + taosCopyFile(srcBuf, dstBuf); + + // copy manifest file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pManifest); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); + taosCopyFile(srcBuf, dstBuf); + + // clear delta data buf + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + + taosMemoryFree(srcBuf); + taosMemoryFree(dstBuf); + taosMemoryFree(srcDir); + taosMemoryFree(dstDir); + return code; +} + SCfInit ginitDict[] = { {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, destroyFunc, encodeValueFunc, decodeValueFunc}, @@ -143,10 +377,90 @@ SCfInit ginitDict[] = { encodeValueFunc, decodeValueFunc}, }; -void* streamBackendInit(const char* path) { - uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; +bool isValidCheckpoint(const char* dir) { return true; } - qDebug("start to init stream backend at %s", path); +int32_t copyFiles(const char* src, const char* dst) { + int32_t code = 0; + // opt later, just hard link + int32_t sLen = strlen(src); + int32_t dLen = strlen(dst); + char* srcName = taosMemoryCalloc(1, sLen + 64); + char* dstName = taosMemoryCalloc(1, dLen + 64); + + TdDirPtr pDir = taosOpenDir(src); + if (pDir == NULL) return 0; + + TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + + sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); + sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + if (!taosDirEntryIsDir(de)) { + code = taosCopyFile(srcName, dstName); + if (code == -1) { + goto _err; + } + } + + memset(srcName, 0, sLen + 64); + memset(dstName, 0, dLen + 64); + } + +_err: + taosMemoryFreeClear(srcName); + taosMemoryFreeClear(dstName); + taosCloseDir(&pDir); + return code >= 0 ? 0 : -1; +} +int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { + // impl later + int32_t code = 0; + + /*param@1: checkpointId dir + param@2: state + copy pChkpIdDir's file to state dir + opt to set hard link to previous file + */ + char* state = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); + if (chkpId != 0) { + char* chkp = taosMemoryCalloc(1, strlen(path) + 64); + sprintf(chkp, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { + if (taosIsDir(state)) { + // remove dir if exists + // taosRenameFile(const char *oldName, const char *newName) + taosRemoveDir(state); + } + taosMkDir(state); + code = copyFiles(chkp, state); + if (code != 0) { + qError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + qInfo("start to restart stream backend at checkpoint path: %s", chkp); + } + + } else { + qError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, + tstrerror(TAOS_SYSTEM_ERROR(errno)), state); + taosMkDir(state); + } + taosMemoryFree(chkp); + } + *dst = state; + + return 0; +} + +void* streamBackendInit(const char* streamPath, int64_t chkpId) { + char* backendPath = NULL; + int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); + + qDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); + + uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); pHandle->list = tdListNew(sizeof(SCfComparator)); taosThreadMutexInit(&pHandle->mutex, NULL); @@ -168,9 +482,10 @@ void* streamBackendInit(const char* path) { rocksdb_options_set_max_total_wal_size(opts, dbMemLimit); rocksdb_options_set_recycle_log_file_num(opts, 6); rocksdb_options_set_max_write_buffer_number(opts, 3); - rocksdb_options_set_info_log_level(opts, 0); + rocksdb_options_set_info_log_level(opts, 1); rocksdb_options_set_db_write_buffer_size(opts, dbMemLimit); rocksdb_options_set_write_buffer_size(opts, dbMemLimit / 2); + rocksdb_options_set_atomic_flush(opts, 1); pHandle->env = env; pHandle->dbOpt = opts; @@ -182,12 +497,12 @@ void* streamBackendInit(const char* path) { char* err = NULL; size_t nCf = 0; - char** cfs = rocksdb_list_column_families(opts, path, &nCf, &err); + char** cfs = rocksdb_list_column_families(opts, backendPath, &nCf, &err); if (nCf == 0 || nCf == 1 || err != NULL) { taosMemoryFreeClear(err); - pHandle->db = rocksdb_open(opts, path, &err); + pHandle->db = rocksdb_open(opts, backendPath, &err); if (err != NULL) { - qError("failed to open rocksdb, path:%s, reason:%s", path, err); + qError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); taosMemoryFreeClear(err); goto _EXIT; } @@ -195,12 +510,17 @@ void* streamBackendInit(const char* path) { /* list all cf and get prefix */ - streamStateOpenBackendCf(pHandle, (char*)path, cfs, nCf); + code = streamStateOpenBackendCf(pHandle, (char*)backendPath, cfs, nCf); + if (code != 0) { + rocksdb_list_column_families_destroy(cfs, nCf); + goto _EXIT; + } } if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - qDebug("succ to init stream backend at %s, backend:%p", path, pHandle); + qDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); + taosMemoryFreeClear(backendPath); return (void*)pHandle; _EXIT: @@ -210,31 +530,25 @@ _EXIT: taosThreadMutexDestroy(&pHandle->mutex); taosThreadMutexDestroy(&pHandle->cfMutex); taosHashCleanup(pHandle->cfInst); - rocksdb_compactionfilterfactory_destroy(pHandle->filterFactory); tdListFree(pHandle->list); taosMemoryFree(pHandle); - qDebug("failed to init stream backend at %s", path); + qDebug("failed to init stream backend at %s", backendPath); + taosMemoryFree(backendPath); return NULL; } void streamBackendCleanup(void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)arg; - void* pIter = taosHashIterate(pHandle->cfInst, NULL); + + void* pIter = taosHashIterate(pHandle->cfInst, NULL); while (pIter != NULL) { RocksdbCfInst* inst = *(RocksdbCfInst**)pIter; destroyRocksdbCfInst(inst); pIter = taosHashIterate(pHandle->cfInst, pIter); } + taosHashCleanup(pHandle->cfInst); if (pHandle->db) { - char* err = NULL; - rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); - rocksdb_flush(pHandle->db, flushOpt, &err); - if (err != NULL) { - qError("failed to flush db before streamBackend clean up, reason:%s", err); - taosMemoryFree(err); - } - rocksdb_flushoptions_destroy(flushOpt); rocksdb_close(pHandle->db); } rocksdb_options_destroy(pHandle->dbOpt); @@ -252,16 +566,18 @@ void streamBackendCleanup(void* arg) { taosThreadMutexDestroy(&pHandle->mutex); taosThreadMutexDestroy(&pHandle->cfMutex); - - qDebug("destroy stream backend backend:%p", pHandle); + qDebug("destroy stream backend :%p", pHandle); taosMemoryFree(pHandle); return; } void streamBackendHandleCleanup(void* arg) { SBackendCfWrapper* wrapper = arg; bool remove = wrapper->remove; + taosThreadRwlockWrlock(&wrapper->rwLock); + qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); if (wrapper->rocksdb == NULL) { + taosThreadRwlockUnlock(&wrapper->rwLock); return; } @@ -270,19 +586,20 @@ void streamBackendHandleCleanup(void* arg) { char* err = NULL; if (remove) { for (int i = 0; i < cfLen; i++) { - if (wrapper->pHandle[i] != NULL) - rocksdb_drop_column_family(wrapper->rocksdb, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[i], &err); + if (wrapper->pHandle[i] != NULL) rocksdb_drop_column_family(wrapper->rocksdb, wrapper->pHandle[i], &err); if (err != NULL) { - // qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + qError("failed to drop cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } } else { rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + for (int i = 0; i < cfLen; i++) { if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err); if (err != NULL) { - qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + qError("failed to flush cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } @@ -295,6 +612,7 @@ void streamBackendHandleCleanup(void* arg) { } } taosMemoryFreeClear(wrapper->pHandle); + for (int i = 0; i < cfLen; i++) { rocksdb_options_destroy(wrapper->cfOpts[i]); rocksdb_block_based_options_destroy(((RocksdbCfParam*)wrapper->param)[i].tableOpt); @@ -310,6 +628,7 @@ void streamBackendHandleCleanup(void* arg) { wrapper->readOpts = NULL; taosMemoryFreeClear(wrapper->cfOpts); taosMemoryFreeClear(wrapper->param); + taosThreadRwlockUnlock(&wrapper->rwLock); taosThreadRwlockDestroy(&wrapper->rwLock); wrapper->rocksdb = NULL; @@ -319,6 +638,363 @@ void streamBackendHandleCleanup(void* arg) { taosMemoryFree(wrapper); return; } + +int32_t getLatestCheckpoint(void* arg, int64_t* checkpoint) { + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + int64_t tc = 0; + int32_t sz = taosArrayGetSize(pMeta->chkpSaved); + if (sz <= 0) { + taosWUnLockLatch(&pMeta->chkpDirLock); + return -1; + } else { + tc = *(int64_t*)taosArrayGetLast(pMeta->chkpSaved); + } + + taosArrayPush(pMeta->chkpInUse, &tc); + + *checkpoint = tc; + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} +/* + * checkpointSave |--cp1--|--cp2--|--cp3--|--cp4--|--cp5--| + * chkpInUse: |--cp2--|--cp4--| + * chkpInUse is doing translation, cannot del until + * replication is finished + */ +int32_t delObsoleteCheckpoint(void* arg, const char* path) { + SStreamMeta* pMeta = arg; + + taosWLockLatch(&pMeta->chkpDirLock); + + SArray* chkpDel = taosArrayInit(10, sizeof(int64_t)); + SArray* chkpDup = taosArrayInit(10, sizeof(int64_t)); + + int64_t firsId = 0; + if (taosArrayGetSize(pMeta->chkpInUse) >= 1) { + firsId = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + + for (int i = 0; i < taosArrayGetSize(pMeta->chkpSaved); i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + if (id >= firsId) { + taosArrayPush(chkpDup, &id); + } else { + taosArrayPush(chkpDel, &id); + } + } + } else { + int32_t sz = taosArrayGetSize(pMeta->chkpSaved); + int32_t dsz = sz - pMeta->chkpCap; // del size + + for (int i = 0; i < dsz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + taosArrayPush(chkpDel, &id); + } + for (int i = dsz < 0 ? 0 : dsz; i < sz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + taosArrayPush(chkpDup, &id); + } + } + taosArrayDestroy(pMeta->chkpSaved); + pMeta->chkpSaved = chkpDup; + + taosWUnLockLatch(&pMeta->chkpDirLock); + + for (int i = 0; i < taosArrayGetSize(chkpDel); i++) { + int64_t id = *(int64_t*)taosArrayGet(chkpDel, i); + char tbuf[256] = {0}; + sprintf(tbuf, "%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, id); + if (taosIsDir(tbuf)) { + taosRemoveDir(tbuf); + } + } + taosArrayDestroy(chkpDel); + return 0; +} + +static int32_t compareCheckpoint(const void* a, const void* b) { + int64_t x = *(int64_t*)a; + int64_t y = *(int64_t*)b; + return x < y ? -1 : 1; +} + +int32_t streamBackendLoadCheckpointInfo(void* arg) { + SStreamMeta* pMeta = arg; + int32_t code = 0; + SArray* suffix = NULL; + + int32_t len = strlen(pMeta->path) + 30; + char* chkpPath = taosMemoryCalloc(1, len); + sprintf(chkpPath, "%s%s%s", pMeta->path, TD_DIRSEP, "checkpoints"); + + if (!taosDirExist(chkpPath)) { + // no checkpoint, nothing to load + taosMemoryFree(chkpPath); + return 0; + } + + TdDirPtr pDir = taosOpenDir(chkpPath); + if (pDir == NULL) { + taosMemoryFree(chkpPath); + return 0; + } + + TdDirEntryPtr de = NULL; + suffix = taosArrayInit(4, sizeof(int64_t)); + + while ((de = taosReadDir(pDir)) != NULL) { + if (strcmp(taosGetDirEntryName(de), ".") == 0 || strcmp(taosGetDirEntryName(de), "..") == 0) continue; + + if (taosDirEntryIsDir(de)) { + char checkpointPrefix[32] = {0}; + int64_t checkpointId = 0; + + int ret = sscanf(taosGetDirEntryName(de), "checkpoint%" PRId64 "", &checkpointId); + if (ret == 1) { + taosArrayPush(suffix, &checkpointId); + } + } else { + continue; + } + } + taosArraySort(suffix, compareCheckpoint); + // free previous chkpSaved + taosArrayClear(pMeta->chkpSaved); + for (int i = 0; i < taosArrayGetSize(suffix); i++) { + int64_t id = *(int64_t*)taosArrayGet(suffix, i); + taosArrayPush(pMeta->chkpSaved, &id); + } + + taosArrayDestroy(suffix); + taosCloseDir(&pDir); + taosMemoryFree(chkpPath); + return 0; +} + +int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*** ppHandle, SArray* refs) { + SArray* pHandle = taosArrayInit(16, POINTER_BYTES); + void* pIter = taosHashIterate(pMeta->pTaskBackendUnique, NULL); + while (pIter) { + int64_t id = *(int64_t*)pIter; + + SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); + if (wrapper == NULL) continue; + + taosThreadRwlockRdlock(&wrapper->rwLock); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (wrapper->pHandle[i]) { + rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; + taosArrayPush(pHandle, &p); + } + } + taosThreadRwlockUnlock(&wrapper->rwLock); + + taosArrayPush(refs, &id); + pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); + } + + int32_t nCf = taosArrayGetSize(pHandle); + + rocksdb_column_family_handle_t** ppCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + for (int i = 0; i < nCf; i++) { + ppCf[i] = taosArrayGetP(pHandle, i); + } + taosArrayDestroy(pHandle); + + *ppHandle = ppCf; + return nCf; +} +int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { + int32_t code = -1; + char* err = NULL; + rocksdb_checkpoint_t* cp = rocksdb_checkpoint_object_create(db, &err); + if (cp == NULL || err != NULL) { + qError("failed to do checkpoint at:%s, reason:%s", path, err); + taosMemoryFreeClear(err); + goto _ERROR; + } + + rocksdb_checkpoint_create(cp, path, 64 << 20, &err); + if (err != NULL) { + qError("failed to do checkpoint at:%s, reason:%s", path, err); + taosMemoryFreeClear(err); + } else { + code = 0; + } +_ERROR: + rocksdb_checkpoint_object_destroy(cp); + return code; +} +int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32_t nCf) { + int code = 0; + char* err = NULL; + + rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + + rocksdb_flush_cfs(db, flushOpt, cf, nCf, &err); + if (err != NULL) { + qError("failed to flush db before streamBackend clean up, reason:%s", err); + taosMemoryFree(err); + code = -1; + } + rocksdb_flushoptions_destroy(flushOpt); + return code; +} +int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { + int32_t code = 0; + char* pChkpDir = taosMemoryCalloc(1, 256); + char* pChkpIdDir = taosMemoryCalloc(1, 256); + + sprintf(pChkpDir, "%s%s%s", path, TD_DIRSEP, "checkpoints"); + code = taosMulModeMkDir(pChkpDir, 0755, true); + if (code != 0) { + qError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + code = -1; + return code; + } + + sprintf(pChkpIdDir, "%s%scheckpoint%" PRId64, pChkpDir, TD_DIRSEP, chkpId); + if (taosIsDir(pChkpIdDir)) { + qInfo("stream rm exist checkpoint%s", pChkpIdDir); + taosRemoveFile(pChkpIdDir); + } + *chkpDir = pChkpDir; + *chkpIdDir = pChkpIdDir; + + return 0; +} + +int32_t streamBackendTriggerChkp(void* arg, char* dst) { + SStreamMeta* pMeta = arg; + int64_t backendRid = pMeta->streamBackendRid; + int32_t code = -1; + + SArray* refs = taosArrayInit(16, sizeof(int64_t)); + rocksdb_column_family_handle_t** ppCf = NULL; + + int64_t st = taosGetTimestampMs(); + SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + + if (pHandle == NULL || pHandle->db == NULL) { + goto _ERROR; + } + int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); + + code = chkpPreFlushDb(pHandle->db, ppCf, nCf); + if (code == 0) { + code = chkpDoDbCheckpoint(pHandle->db, dst); + if (code != 0) { + qError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); + } else { + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, + taosGetTimestampMs() - st); + } + } else { + qError("stream backend:%p failed to flush db at:%s", pHandle, dst); + } + + // release all ref to cfWrapper; + for (int i = 0; i < taosArrayGetSize(refs); i++) { + int64_t id = *(int64_t*)taosArrayGet(refs, i); + taosReleaseRef(streamBackendCfWrapperId, id); + } + +_ERROR: + taosReleaseRef(streamBackendId, backendRid); + taosArrayDestroy(refs); + return code; +} +int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId) { + if (arg == NULL) return 0; + + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + taosArrayPush(pMeta->chkpInUse, &chkpId); + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} +int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId) { + if (arg == NULL) return 0; + + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + if (taosArrayGetSize(pMeta->chkpInUse) > 0) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + if (id == chkpId) { + taosArrayPopFrontBatch(pMeta->chkpInUse, 1); + } + } + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} + +int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) { + SStreamMeta* pMeta = arg; + int64_t backendRid = pMeta->streamBackendRid; + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + + SArray* refs = taosArrayInit(16, sizeof(int64_t)); + + rocksdb_column_family_handle_t** ppCf = NULL; + + char* pChkpDir = NULL; + char* pChkpIdDir = NULL; + if (chkpPreCheckDir(pMeta->path, checkpointId, &pChkpDir, &pChkpIdDir) != 0) { + taosArrayDestroy(refs); + return code; + } + + SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + if (pHandle == NULL || pHandle->db == NULL) { + goto _ERROR; + } + + // Get all cf and acquire cfWrappter + int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); + + code = chkpPreFlushDb(pHandle->db, ppCf, nCf); + if (code == 0) { + code = chkpDoDbCheckpoint(pHandle->db, pChkpIdDir); + if (code != 0) { + qError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); + } else { + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, + taosGetTimestampMs() - st); + } + } else { + qError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); + } + // release all ref to cfWrapper; + for (int i = 0; i < taosArrayGetSize(refs); i++) { + int64_t id = *(int64_t*)taosArrayGet(refs, i); + taosReleaseRef(streamBackendCfWrapperId, id); + } + if (code == 0) { + taosWLockLatch(&pMeta->chkpDirLock); + taosArrayPush(pMeta->chkpSaved, &checkpointId); + taosWUnLockLatch(&pMeta->chkpDirLock); + + // delete obsolte checkpoint + delObsoleteCheckpoint(arg, pChkpDir); + pMeta->chkpId = checkpointId; + } + +_ERROR: + taosReleaseRef(streamBackendId, backendRid); + taosArrayDestroy(refs); + taosMemoryFree(ppCf); + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + return code; +} + SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; SListNode* node = NULL; @@ -348,7 +1024,8 @@ static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const cha rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen) { - int ret = memcmp(aBuf, bBuf, aLen); + int len = aLen < bLen ? aLen : bLen; + int ret = memcmp(aBuf, bBuf, len); if (ret == 0) { if (aLen < bLen) return -1; @@ -360,9 +1037,9 @@ int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, return ret; } } -int streamStateValueIsStale(char* vv) { +int streamStateValueIsStale(char* v) { int64_t ts = 0; - taosDecodeFixedI64(vv, &ts); + taosDecodeFixedI64(v, &ts); return (ts != 0 && ts < taosGetTimestampMs()) ? 1 : 0; } int iterValueIsStale(rocksdb_iterator_t* iter) { @@ -432,7 +1109,7 @@ int stateKeyDecode(void* k, char* buf) { int stateKeyToString(void* k, char* buf) { SStateKey* key = k; int n = 0; - n += sprintf(buf + n, "[groupId:%" PRId64 ",", key->key.groupId); + n += sprintf(buf + n, "[groupId:%" PRIu64 ",", key->key.groupId); n += sprintf(buf + n, "ts:%" PRIi64 ",", key->key.ts); n += sprintf(buf + n, "opNum:%" PRIi64 "]", key->opNum); return n; @@ -468,8 +1145,8 @@ int stateSessionKeyDBComp(void* state, const char* aBuf, size_t aLen, const char return stateSessionKeyCmpr(&w1, sizeof(w1), &w2, sizeof(w2)); } -int stateSessionKeyEncode(void* ses, char* buf) { - SStateSessionKey* sess = ses; +int stateSessionKeyEncode(void* k, char* buf) { + SStateSessionKey* sess = k; int len = 0; len += taosEncodeFixedI64((void**)&buf, sess->key.win.skey); len += taosEncodeFixedI64((void**)&buf, sess->key.win.ekey); @@ -477,8 +1154,8 @@ int stateSessionKeyEncode(void* ses, char* buf) { len += taosEncodeFixedI64((void**)&buf, sess->opNum); return len; } -int stateSessionKeyDecode(void* ses, char* buf) { - SStateSessionKey* sess = ses; +int stateSessionKeyDecode(void* k, char* buf) { + SStateSessionKey* sess = k; int len = 0; char* p = buf; @@ -693,33 +1370,23 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { SStreamValue key = {0}; char* p = value; if (streamStateValueIsStale(p)) { - *dest = NULL; - return -1; + goto _EXCEPT; } p = taosDecodeFixedI64(p, &key.unixTimestamp); p = taosDecodeFixedI32(p, &key.len); if (vlen != (sizeof(int64_t) + sizeof(int32_t) + key.len)) { - if (dest != NULL) *dest = NULL; qError("vlen: %d, read len: %d", vlen, key.len); - return -1; + goto _EXCEPT; } + if (key.len != 0 && dest != NULL) p = taosDecodeBinary(p, (void**)dest, key.len); - if (key.len == 0) { - key.data = NULL; - } else { - p = taosDecodeBinary(p, (void**)&(key.data), key.len); - } - - if (ttl != NULL) { - int64_t now = taosGetTimestampMs(); - *ttl = key.unixTimestamp == 0 ? 0 : key.unixTimestamp - now; - } - if (dest != NULL) { - *dest = key.data; - } else { - taosMemoryFree(key.data); - } + if (ttl != NULL) *ttl = key.unixTimestamp == 0 ? 0 : key.unixTimestamp - taosGetTimestampMs(); return key.len; + +_EXCEPT: + if (dest != NULL) *dest = NULL; + if (ttl != NULL) *ttl = 0; + return -1; } const char* compareDefaultName(void* arg) { @@ -808,6 +1475,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t if (3 == sscanf(cf, "0x%" PRIx64 "-%d_%s", &streamId, &taskId, funcname)) { rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -829,6 +1497,12 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t if (err != NULL) { qError("failed to open rocksdb cf, reason:%s", err); taosMemoryFree(err); + taosMemoryFree(cfHandle); + taosMemoryFree(pCompare); + taosMemoryFree(params); + taosMemoryFree(cfOpts); + // fix other leak + return -1; } else { qDebug("succ to open rocksdb cf"); } @@ -838,12 +1512,14 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t cfHandle[0] = NULL; } rocksdb_options_destroy(cfOpts[0]); + handle->db = db; static int32_t cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); for (int i = 0; i < nCf; i++) { char* cf = cfs[i]; - if (i == 0) continue; + if (i == 0) continue; // skip default column family, not set opt + char funcname[64] = {0}; if (3 == sscanf(cf, "0x%" PRIx64 "-%d_%s", &streamId, &taskId, funcname)) { char idstr[128] = {0}; @@ -876,15 +1552,16 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t inst->pHandle[idx] = cfHandle[i]; } } - void** pIter = taosHashIterate(handle->cfInst, NULL); + void* pIter = taosHashIterate(handle->cfInst, NULL); while (pIter) { - RocksdbCfInst* inst = *pIter; + RocksdbCfInst* inst = *(RocksdbCfInst**)pIter; for (int i = 0; i < cfLen; i++) { if (inst->cfOpt[i] == NULL) { rocksdb_options_t* opt = rocksdb_options_create_copy(handle->dbOpt); rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -914,12 +1591,12 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t return 0; } int streamStateOpenBackend(void* backend, SStreamState* pState) { - qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); + // qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); taosAcquireRef(streamBackendId, pState->streamBackendRid); SBackendWrapper* handle = backend; SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper)); - taosThreadMutexLock(&handle->cfMutex); + taosThreadMutexLock(&handle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; @@ -954,6 +1631,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { // refactor later rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -997,6 +1675,9 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { void streamStateCloseBackend(SStreamState* pState, bool remove) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SBackendWrapper* pHandle = wrapper->pBackend; + + qInfo("start to close state on backend: %p", pHandle); + taosThreadMutexLock(&pHandle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { @@ -1007,7 +1688,7 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { taosThreadMutexUnlock(&pHandle->cfMutex); char* status[] = {"close", "drop"}; - qInfo("start to close %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, + qInfo("start to %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, wrapper->idstr); wrapper->remove |= remove; // update by other pState taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId); @@ -1065,21 +1746,21 @@ bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len } return true; } -rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, rocksdb_snapshot_t** snapshot, +rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKeyName, rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt) { - int idx = streamStateGetCfIdx(pState, cfName); + int idx = streamStateGetCfIdx(pState, cfKeyName); - rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); - *readOpt = rOpt; + *readOpt = rocksdb_readoptions_create(); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (snapshot != NULL) { *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); - rocksdb_readoptions_set_snapshot(rOpt, *snapshot); - rocksdb_readoptions_set_fill_cache(rOpt, 0); + rocksdb_readoptions_set_snapshot(*readOpt, *snapshot); + rocksdb_readoptions_set_fill_cache(*readOpt, 0); } - return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); + return rocksdb_create_iterator_cf(wrapper->rocksdb, *readOpt, + ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); } #define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ @@ -1154,7 +1835,6 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa taosMemoryFree(val); \ if (vLen != NULL) *vLen = tlen; \ } \ - if (code == 0) qDebug("streamState str: %s succ to read from %s_%s", toString, wrapper->idstr, funcname); \ } while (0); #define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ @@ -1209,10 +1889,11 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { qDebug("streamStateClear_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - char sKeyStr[128] = {0}; - char eKeyStr[128] = {0}; - SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; - SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; + + char sKeyStr[128] = {0}; + char eKeyStr[128] = {0}; + SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; + SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; int sLen = stateKeyEncode(&sKey, sKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr); @@ -1247,6 +1928,7 @@ int32_t streamStateGetFirst_rocksdb(SStreamState* pState, SWinKey* key) { qDebug("streamStateGetFirst_rocksdb"); SWinKey tmp = {.ts = 0, .groupId = 0}; streamStatePut_rocksdb(pState, &tmp, NULL, 0); + SStreamStateCur* pCur = streamStateSeekKeyNext_rocksdb(pState, &tmp); int32_t code = streamStateGetKVByCur_rocksdb(pCur, key, NULL, 0); streamStateFreeCur(pCur); @@ -1301,9 +1983,13 @@ int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, cons if (pKtmp->opNum != pCur->number) { return -1; } - size_t vlen = 0; - if (pVal != NULL) *pVal = (char*)rocksdb_iter_value(pCur->iter, &vlen); - if (pVLen != NULL) *pVLen = vlen; + + if (pVLen != NULL) { + size_t vlen = 0; + const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); + *pVLen = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + } + *pKey = pKtmp->key; return 0; } @@ -1361,20 +2047,32 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); - int32_t code = 0; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + int32_t code = 0; const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX}; STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0); - char buf[128] = {0}; - int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); + if (code != 0) { + return NULL; + } + + char buf[128] = {0}; + int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); + + { + char tbuf[256] = {0}; + stateKeyToString((void*)&maxStateKey, tbuf); + qDebug("seek to last:%s", tbuf); + } + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + + pCur->number = pState->number; + pCur->db = ((SBackendCfWrapper*)pState->pTdbState->pBackendCfWrapper)->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); - rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); + rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); rocksdb_iter_prev(pCur->iter); while (rocksdb_iter_valid(pCur->iter) && iterValueIsStale(pCur->iter)) { rocksdb_iter_prev(pCur->iter); @@ -1384,6 +2082,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK streamStateFreeCur(pCur); pCur = NULL; } + STREAM_STATE_DEL_ROCKSDB(pState, "state", &maxStateKey); return pCur; } @@ -1391,12 +2090,14 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; SStateKey sKey = {.key = *key, .opNum = pState->number}; char buf[128] = {0}; @@ -1405,18 +2106,14 @@ SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* rocksdb_iter_seek(pCur->iter, buf, len); if (rocksdb_iter_valid(pCur->iter) && !iterValueIsStale(pCur->iter)) { - size_t vlen; - char* val = (char*)rocksdb_iter_value(pCur->iter, &vlen); - if (!streamStateValueIsStale(val)) { - SStateKey curKey; - size_t kLen = 0; - char* keyStr = (char*)rocksdb_iter_key(pCur->iter, &kLen); - stateKeyDecode((void*)&curKey, keyStr); + SStateKey curKey; + size_t kLen = 0; + char* keyStr = (char*)rocksdb_iter_key(pCur->iter, &kLen); + stateKeyDecode((void*)&curKey, keyStr); - if (stateKeyCmpr(&sKey, sizeof(sKey), &curKey, sizeof(curKey)) == 0) { - pCur->number = pState->number; - return pCur; - } + if (stateKeyCmpr(&sKey, sizeof(sKey), &curKey, sizeof(curKey)) == 0) { + pCur->number = pState->number; + return pCur; } } streamStateFreeCur(pCur); @@ -1454,23 +2151,22 @@ int32_t streamStateSessionGet_rocksdb(SStreamState* pState, SSessionKey* key, vo SSessionKey resKey = *key; void* tmp = NULL; int32_t vLen = 0; - code = streamStateSessionGetKVByCur_rocksdb(pCur, &resKey, &tmp, &vLen); - if (code == 0) { - if (pVLen != NULL) *pVLen = vLen; - if (key->win.skey != resKey.win.skey) { - code = -1; - } else { - *key = resKey; - if (pVal != NULL && pVLen != NULL) { - *pVal = taosMemoryCalloc(1, *pVLen); - memcpy(*pVal, tmp, *pVLen); - } - } + code = streamStateSessionGetKVByCur_rocksdb(pCur, &resKey, &tmp, &vLen); + if (code == 0 && key->win.skey == resKey.win.skey) { + *key = resKey; + + if (pVal) { + *pVal = tmp; + tmp = NULL; + }; + if (pVLen) *pVLen = vLen; + } else { + code = -1; } + taosMemoryFree(tmp); streamStateFreeCur(pCur); - // impl later return code; } @@ -1516,8 +2212,6 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta rocksdb_iter_prev(pCur->iter); if (!rocksdb_iter_valid(pCur->iter)) { - // qWarn("streamState failed to seek key prev - // %s", toString); streamStateFreeCur(pCur); return NULL; } @@ -1535,10 +2229,10 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; - char buf[128] = {0}; - + char buf[128] = {0}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int len = stateSessionKeyEncode(&sKey, buf); + if (!streamStateIterSeekAndValid(pCur->iter, buf, len)) { streamStateFreeCur(pCur); return NULL; @@ -1586,6 +2280,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, con streamStateFreeCur(pCur); return NULL; } + size_t klen; const char* iKey = rocksdb_iter_key(pCur->iter, &klen); SStateSessionKey curKey = {0}; @@ -1672,6 +2367,7 @@ SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinK pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1711,8 +2407,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, winKeyDecode(&winKey, keyStr); const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - // char* dst = NULL; - int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); if (len < 0) { return -1; } @@ -1733,6 +2428,7 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1770,6 +2466,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1803,10 +2500,10 @@ int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSes if (pCur == NULL) { return -1; } - pCur->number = pState->number; pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int32_t c = 0; @@ -2075,13 +2772,12 @@ void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; return pCur; } int32_t streamDefaultIterValid_rocksdb(void* iter) { SStreamStateCur* pCur = iter; - bool val = rocksdb_iter_valid(pCur->iter); - - return val ? 1 : 0; + return rocksdb_iter_valid(pCur->iter) ? 1 : 0; } void streamDefaultIterSeek_rocksdb(void* iter, const char* key) { SStreamStateCur* pCur = iter; @@ -2097,13 +2793,16 @@ char* streamDefaultIterKey_rocksdb(void* iter, int32_t* len) { } char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { SStreamStateCur* pCur = iter; - int32_t vlen = 0; - char* dst = NULL; - const char* vval = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); - if (decodeValueFunc((void*)vval, vlen, NULL, &dst) < 0) { + char* ret = NULL; + + int32_t vlen = 0; + const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); + *len = decodeValueFunc((void*)val, vlen, NULL, &ret); + if (*len < 0) { return NULL; } - return dst; + + return ret; } // batch func void* streamStateCreateBatch() { @@ -2117,25 +2816,34 @@ int32_t streamStateGetBatchSize(void* pBatch) { void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_writebatch_t*)pBatch); } void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } -int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key, +int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - int i = streamStateGetCfIdx(pState, cfName); + int i = streamStateGetCfIdx(pState, cfKeyName); if (i < 0) { - qError("streamState failed to put to cf name:%s", cfName); + qError("streamState failed to put to cf name:%s", cfKeyName); return -1; } + char buf[128] = {0}; int32_t klen = ginitDict[i].enFunc((void*)key, buf); - char* ttlV = NULL; - int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); + char* ttlV = NULL; + int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); + rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); taosMemoryFree(ttlV); + + { + char tbuf[256] = {0}; + ginitDict[i].toStrFunc((void*)key, tbuf); + qDebug("streamState str: %s succ to write to %s_%s, len: %d", tbuf, wrapper->idstr, ginitDict[i].key, vlen); + } return 0; } + int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl, void* tmpBuf) { char buf[128] = {0}; @@ -2143,14 +2851,19 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb char* ttlV = tmpBuf; int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); if (tmpBuf == NULL) { taosMemoryFree(ttlV); } + + { + char tbuf[256] = {0}; + ginitDict[cfIdx].toStrFunc((void*)key, tbuf); + qDebug("streamState str: %s succ to write to %s_%s", tbuf, wrapper->idstr, ginitDict[cfIdx].key); + } return 0; } int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { @@ -2161,11 +2874,13 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { qError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); return -1; + } else { + qDebug("write batch to backend:%p", wrapper->pBackend); } return 0; } - uint32_t nextPow2(uint32_t x) { + if (x <= 1) return 2; x = x - 1; x = x | (x >> 1); x = x | (x >> 2); @@ -2173,4 +2888,4 @@ uint32_t nextPow2(uint32_t x) { x = x | (x >> 8); x = x | (x >> 16); return x + 1; -} \ No newline at end of file +} diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 722c557b8f..ebf3ce8a30 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -13,99 +13,72 @@ * along with this program. If not, see . */ -#if 0 -#include "streamInc.h" +#include "streamInt.h" -int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { +int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pReq->mgmtEps) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->mnodeId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { +int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pReq->mgmtEps) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->mnodeId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; tEndDecode(pDecoder); return 0; } -int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { +int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->nodeId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pRsp->success) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { +int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->nodeId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pRsp->success) < 0) return -1; tEndDecode(pDecoder); return 0; } -int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq) { +int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->upstreamTaskId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; - if (tEncodeI8(pEncoder, pReq->taskLevel) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq) { - if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; - if (tDecodeI8(pDecoder, &pReq->taskLevel) < 0) return -1; - tEndDecode(pDecoder); - return 0; -} - -int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) { - if (tStartEncode(pEncoder) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->upstreamTaskId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->upstreamNodeId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; - if (tEncodeI8(pEncoder, pRsp->taskLevel) < 0) return -1; - tEndEncode(pEncoder); - return pEncoder->pos; -} - -int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) { +int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; @@ -114,83 +87,258 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; - if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->taskLevel) < 0) return -1; tEndDecode(pDecoder); return 0; } -static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { - if (pTask->checkpointingId == 0) { - pTask->checkpointingId = checkpointId; - pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList); +static int32_t streamAlignCheckpoint(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num); + if (old == 0) { + qDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); } - ASSERT(pTask->checkpointingId == checkpointId); - return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); } -static int32_t streamDoCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { - // commit tdb state - streamStateCommit(pTask->pState); - // commit non-tdb state - // copy and save new state - // report to mnode - // send checkpoint req to downstream - return 0; -} - -static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { - // ref wal - // set status checkpointing - // do checkpoint - return 0; -} -int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { - int32_t code; - int64_t checkpointId = pReq->checkpointId; - - code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId); - if (code < 0) { - // rsp error - return -1; +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { + SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); + if (pChkpoint == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - return 0; + pChkpoint->type = checkpointType; + + SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + if (pBlock == NULL) { + taosFreeQitem(pChkpoint); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pBlock->info.type = STREAM_CHECKPOINT; + pBlock->info.version = pTask->checkpointingId; + pBlock->info.rows = 1; + pBlock->info.childId = pTask->info.selfChildId; + + pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock));//pBlock; + taosArrayPush(pChkpoint->blocks, pBlock); + + taosMemoryFree(pBlock); + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pChkpoint) < 0) { + taosFreeQitem(pChkpoint); + return TSDB_CODE_OUT_OF_MEMORY; + } + + streamSchedExec(pTask); + return TSDB_CODE_SUCCESS; } -int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq) { - int32_t code; - int64_t checkpointId = pReq->checkpointId; - int32_t childId = pReq->childId; +int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) { - code = streamAlignCheckpoint(pTask, checkpointId, childId); - if (code > 0) { - return 0; + // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. + taosThreadMutexLock(&pTask->lock); + + pTask->status.taskStatus = TASK_STATUS__CK; + pTask->checkpointingId = pReq->checkpointId; + pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + + // 2. let's dispatch checkpoint msg to downstream task directly and do nothing else. put the checkpoint block into + // inputQ, to make sure all blocks with less version have been handled by this task already. + int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + taosThreadMutexUnlock(&pTask->lock); + + return code; +} + +static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { + pBlock->srcTaskId = pTask->id.taskId; + pBlock->srcVgId = pTask->pMeta->vgId; + + int32_t code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock); + if (code == 0) { + streamDispatchStreamBlock(pTask); + } else { + streamFreeQitem((SStreamQueueItem*)pBlock); + } + + return code; +} + +int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { + SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); + int64_t checkpointId = pDataBlock->info.version; + + const char* id = pTask->id.idStr; + int32_t code = TSDB_CODE_SUCCESS; + + // set the task status + pTask->checkpointingId = checkpointId; + + // set task status + pTask->status.taskStatus = TASK_STATUS__CK; + + { // todo: remove this when the pipeline checkpoint generating is used. + SStreamMeta* pMeta = pTask->pMeta; + taosWLockLatch(&pMeta->lock); + + if (pMeta->chkptNotReadyTasks == 0) { + pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); + pMeta->totalTasks = pMeta->chkptNotReadyTasks; } - if (code < 0) { - ASSERT(0); - return -1; + + taosWUnLockLatch(&pMeta->lock); + } + + //todo fix race condition: set the status and append checkpoint block + int32_t taskLevel = pTask->info.taskLevel; + if (taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + qDebug("s-task:%s set childIdx:%d, and add checkpoint block into outputQ", id, pTask->info.selfChildId); + continueDispatchCheckpointBlock(pBlock, pTask); + } else { // only one task exists, no need to dispatch downstream info + streamProcessCheckpointReadyMsg(pTask); + streamFreeQitem((SStreamQueueItem*)pBlock); + } + } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { + ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) > 0); + + // update the child Id for downstream tasks + streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); + + // there are still some upstream tasks not send checkpoint request, do nothing and wait for then + int32_t notReady = streamAlignCheckpoint(pTask); + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + if (notReady > 0) { + qDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", + id, pTask->info.selfChildId, notReady, num); + streamFreeQitem((SStreamQueueItem*)pBlock); + return code; + } + + if (taskLevel == TASK_LEVEL__SINK) { + pTask->status.taskStatus = TASK_STATUS__CK_READY; + qDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", + id, num); + streamFreeQitem((SStreamQueueItem*)pBlock); + streamTaskBuildCheckpoint(pTask); + } else { + qDebug( + "s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg " + "downstream", id, num); + + // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task + // can start local checkpoint procedure + pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + + // if all upstreams are ready for generating checkpoint, set the status to be TASK_STATUS__CK_READY + // put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task + // already. And then, dispatch check point msg to all downstream tasks + code = continueDispatchCheckpointBlock(pBlock, pTask); } } - code = streamDoCheckpoint(pMeta, pTask, checkpointId); - if (code < 0) { - // rsp error - return -1; + return code; +} + +/** + * All down stream tasks have successfully completed the check point task. + * Current stream task is allowed to start to do checkpoint things in ASYNC model. + */ +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); + + // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task + int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1); + ASSERT(notReady >= 0); + + if (notReady == 0) { + qDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", + pTask->id.idStr); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); + } else { + int32_t total = streamTaskGetNumOfDownstream(pTask); + qDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); } - // send rsp to all children - return 0; } -int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) { - // recover step2, scan from wal - // unref wal - // set status normal - return 0; +int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { + taosWLockLatch(&pMeta->lock); + + int64_t keys[2]; + for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + keys[0] = pId->streamId; + keys[1] = pId->taskId; + + SStreamTask* p = *(SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + if (p->info.fillHistory == 1) { + continue; + } + + int8_t prev = p->status.taskStatus; + ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); + + p->chkInfo.checkpointId = p->checkpointingId; + streamSetStatusNormal(p); + + // save the task + streamMetaSaveTask(pMeta, p); + streamTaskOpenAllUpstreamInput(p); // open inputQ for all upstream tasks + qDebug("vgId:%d s-task:%s level:%d commit task status after checkpoint completed, checkpointId:%" PRId64 + ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", + pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.currentVer, + streamGetTaskStatusStr(prev)); + } + + if (streamMetaCommit(pMeta) < 0) { + taosWUnLockLatch(&pMeta->lock); + qError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, + checkpointId, terrstr()); + return -1; + } else { + taosWUnLockLatch(&pMeta->lock); + qInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { + int32_t code = 0; + + // check for all tasks, and do generate the vnode-wide checkpoint data. + SStreamMeta* pMeta = pTask->pMeta; + int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1); + ASSERT(remain >= 0); + + if (remain == 0) { // all tasks are in TASK_STATUS__CK_READY state + qDebug("s-task:%s is ready for checkpoint", pTask->id.idStr); + pMeta->totalTasks = 0; + + streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); + streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); + qDebug("vgId:%d vnode wide checkpoint completed, save all tasks status, checkpointId:%" PRId64, pMeta->vgId, + pTask->checkpointingId); + } else { + qDebug("vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, not ready:%d/%d", pMeta->vgId, + pTask->id.idStr, remain, pMeta->totalTasks); + } + + // send check point response to upstream task + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + code = streamTaskSendCheckpointSourceRsp(pTask); + } else { + code = streamTaskSendCheckpointReadyMsg(pTask); + } + + if (code != TSDB_CODE_SUCCESS) { + // todo: let's retry send rsp to upstream/mnode + qError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, + pTask->checkpointingId, tstrerror(code)); + } + + return code; } -#endif diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index fc1b788b77..3c731df071 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -15,7 +15,7 @@ #include "streamInt.h" -SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) { +SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, pReq->totalLen); if (pData == NULL) { return NULL; @@ -23,6 +23,7 @@ SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq pData->type = blockType; pData->srcVgId = srcVg; + pData->srcTaskId = pReq->upstreamTaskId; int32_t blockNum = pReq->blockNum; SArray* pArray = taosArrayInit_s(sizeof(SSDataBlock), blockNum); @@ -60,16 +61,15 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT return NULL; } + pStreamBlocks->srcTaskId = pTask->id.taskId; pStreamBlocks->type = STREAM_INPUT__DATA_BLOCK; pStreamBlocks->blocks = pRes; if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem; - pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pSubmit->ver; } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem; - pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pMerged->ver; } @@ -121,6 +121,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) { return NULL; } + pDataSubmit->ver = pData->ver; pDataSubmit->submit = *pData; *pDataSubmit->dataRef = 1; // initialize the reference count to be 1 pDataSubmit->type = type; @@ -199,6 +200,11 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* } } +static void freeItems(void* param) { + SSDataBlock* pBlock = param; + taosArrayDestroy(pBlock->pDataBlock); +} + void streamFreeQitem(SStreamQueueItem* data) { int8_t type = data->type; if (type == STREAM_INPUT__GET_RES) { @@ -232,5 +238,22 @@ void streamFreeQitem(SStreamQueueItem* data) { SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; blockDataDestroy(pRefBlock->pBlock); taosFreeQitem(pRefBlock); + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + SStreamDataBlock* pBlock = (SStreamDataBlock*) data; + taosArrayDestroyEx(pBlock->blocks, freeItems); + taosFreeQitem(pBlock); } } + +const char* streamGetBlockTypeStr(int32_t type) { + switch (type) { + case STREAM_INPUT__CHECKPOINT: + return "checkpoint"; + case STREAM_INPUT__CHECKPOINT_TRIGGER: + return "checkpoint-trigger"; + case STREAM_INPUT__TRANS_STATE: + return "trans-state"; + default: + return ""; + } +} \ No newline at end of file diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 0864eb3c28..c51ed10c44 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -14,7 +14,9 @@ */ #include "streamInt.h" +#include "trpc.h" #include "ttimer.h" +#include "tmisce.h" #define MAX_BLOCK_NAME_NUM 1024 #define DISPATCH_RETRY_INTERVAL_MS 300 @@ -25,22 +27,38 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; +typedef struct { + int32_t upStreamTaskId; + SEpSet upstreamNodeEpset; + SRpcMsg msg; +} SStreamChkptReadyInfo; + +static void doRetryDispatchData(void* param, void* tmrId); +static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet); +static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq); +static int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, + int32_t vgSz, int64_t groupId); +static int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet); + static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, int64_t dstTaskId, int32_t type); -static void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { +void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { pMsg->msgType = msgType; pMsg->pCont = pCont; pMsg->contLen = contLen; } -static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { +int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; - if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamChildId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->blockNum) < 0) return -1; @@ -57,44 +75,15 @@ static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatc return pEncoder->pos; } -static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { - int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); - void* buf = taosMemoryCalloc(1, dataStrLen); - if (buf == NULL) return -1; - - SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; - pRetrieve->useconds = 0; - pRetrieve->precision = TSDB_DEFAULT_PRECISION; - pRetrieve->compressed = 0; - pRetrieve->completed = 1; - pRetrieve->streamBlockType = pBlock->info.type; - pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); - pRetrieve->skey = htobe64(pBlock->info.window.skey); - pRetrieve->ekey = htobe64(pBlock->info.window.ekey); - pRetrieve->version = htobe64(pBlock->info.version); - pRetrieve->watermark = htobe64(pBlock->info.watermark); - memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); - - int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); - pRetrieve->numOfCols = htonl(numOfCols); - - int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); - actualLen += sizeof(SRetrieveTableRsp); - ASSERT(actualLen <= dataStrLen); - taosArrayPush(pReq->dataLen, &actualLen); - taosArrayPush(pReq->data, &buf); - - pReq->totalLen += dataStrLen; - return 0; -} - int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->blockNum) < 0) return -1; @@ -113,14 +102,16 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { taosArrayPush(pReq->dataLen, &len1); taosArrayPush(pReq->data, &data); } + tEndDecode(pDecoder); return 0; } -int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, - int64_t dstTaskId, int32_t type) { +static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, + int32_t numOfBlocks, int64_t dstTaskId, int32_t type) { pReq->streamId = pTask->id.streamId; pReq->srcVgId = vgId; + pReq->stage = pTask->pMeta->stage; pReq->upstreamTaskId = pTask->id.taskId; pReq->upstreamChildId = pTask->info.selfChildId; pReq->upstreamNodeId = pTask->info.nodeId; @@ -205,11 +196,11 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) .retrieveLen = dataStrLen, }; - int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t sz = taosArrayGetSize(pTask->pUpstreamInfoList); ASSERT(sz > 0); for (int32_t i = 0; i < sz; i++) { req.reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); req.dstNodeId = pEpInfo->nodeId; req.dstTaskId = pEpInfo->taskId; int32_t len; @@ -231,7 +222,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) tEncodeStreamRetrieveReq(&encoder, &req); tEncoderClear(&encoder); - SRpcMsg rpcMsg = {.code = 0, .msgType = TDMT_STREAM_RETRIEVE, .pCont = buf, .contLen = sizeof(SMsgHead) + len}; + SRpcMsg rpcMsg = {0}; + initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE, buf, len + sizeof(SMsgHead)); + if (tmsgSendReq(&pEpInfo->epSet, &rpcMsg) < 0) { ASSERT(0); goto CLEAR; @@ -274,175 +267,16 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR rpcFreeCont(buf); return code; } - tEncoderClear(&encoder); - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = TDMT_STREAM_TASK_CHECK; - - qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, + initRpcMsg(&msg, TDMT_VND_STREAM_TASK_CHECK, buf, tlen + sizeof(SMsgHead)); + qDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); return 0; } -int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet) { - void* buf = NULL; - int32_t code = -1; - SRpcMsg msg = {0}; - - int32_t tlen; - tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code); - if (code < 0) { - return -1; - } - - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - ((SMsgHead*)buf)->vgId = htonl(vgId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) { - if (buf) { - rpcFreeCont(buf); - } - return code; - } - - tEncoderClear(&encoder); - - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH; - - tmsgSendReq(pEpSet, &msg); - - const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, - pReq->downstreamTaskId, vgId); - return 0; -} - -static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { - void* buf = NULL; - int32_t code = -1; - SRpcMsg msg = {0}; - - // serialize - int32_t tlen; - tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); - if (code < 0) { - goto FAIL; - } - - code = -1; - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - goto FAIL; - } - - ((SMsgHead*)buf)->vgId = htonl(vgId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) { - goto FAIL; - } - tEncoderClear(&encoder); - - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = pTask->msgInfo.msgType; - - qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg, len:%d", pTask->id.idStr, pReq->taskId, vgId, - msg.contLen); - return tmsgSendReq(pEpSet, &msg); - -FAIL: - if (buf) { - rpcFreeCont(buf); - } - - return code; -} - -int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz, - int64_t groupId) { - uint32_t hashValue = 0; - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - if (pTask->pNameMap == NULL) { - pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - } - - void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t)); - if (pVal) { - SBlockName* pBln = (SBlockName*)pVal; - hashValue = pBln->hashValue; - if (!pDataBlock->info.parTbName[0]) { - memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN); - memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName)); - } - } else { - char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); - if (ctbName == NULL) { - return -1; - } - - if (pDataBlock->info.parTbName[0]) { - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); - } else { - buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); - } - - /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ - SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; - hashValue = - taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix); - taosMemoryFree(ctbName); - SBlockName bln = {0}; - bln.hashValue = hashValue; - memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName)); - if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) { - tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); - } - } - - bool found = false; - // TODO: optimize search - int32_t j; - for (j = 0; j < vgSz; j++) { - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); - ASSERT(pVgInfo->vgId > 0); - - if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { - if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { - return -1; - } - - if (pReqs[j].blockNum == 0) { - atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - } - - pReqs[j].blockNum++; - found = true; - break; - } - } - ASSERT(found); - return 0; -} - static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; int32_t numOfBlocks = taosArrayGetSize(pData->blocks); @@ -569,7 +403,11 @@ static void doRetryDispatchData(void* param, void* tmrId) { if (!streamTaskShouldStop(&pTask->status)) { qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + if (streamTaskShouldPause(&pTask->status)) { + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS * 10); + } else { + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + } } else { atomic_sub_fetch_8(&pTask->status.timerActive, 1); qDebug("s-task:%s should stop, abort from timer", pTask->id.idStr); @@ -580,34 +418,101 @@ static void doRetryDispatchData(void* param, void* tmrId) { } void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) { - qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration); + qError("s-task:%s dispatch data in %" PRId64 "ms", pTask->id.idStr, waitDuration); taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer); } -int32_t streamDispatchStreamBlock(SStreamTask* pTask) { - STaskOutputInfo* pInfo = &pTask->outputInfo; - ASSERT((pInfo->type == TASK_OUTPUT__FIXED_DISPATCH || pInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH)); +int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz, + int64_t groupId) { + uint32_t hashValue = 0; + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + if (pTask->pNameMap == NULL) { + pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + } - int32_t numOfElems = taosQueueItemSize(pInfo->queue->queue); + void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t)); + if (pVal) { + SBlockName* pBln = (SBlockName*)pVal; + hashValue = pBln->hashValue; + if (!pDataBlock->info.parTbName[0]) { + memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN); + memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName)); + } + } else { + char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); + if (ctbName == NULL) { + return -1; + } + + if (pDataBlock->info.parTbName[0]) { + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + } else { + buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + } + + /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ + SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; + hashValue = + taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix); + taosMemoryFree(ctbName); + SBlockName bln = {0}; + bln.hashValue = hashValue; + memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName)); + if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) { + tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); + } + } + + bool found = false; + // TODO: optimize search + int32_t j; + for (j = 0; j < vgSz; j++) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); + ASSERT(pVgInfo->vgId > 0); + + if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { + if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { + return -1; + } + + if (pReqs[j].blockNum == 0) { + atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); + } + + pReqs[j].blockNum++; + found = true; + break; + } + } + ASSERT(found); + return 0; +} + +int32_t streamDispatchStreamBlock(SStreamTask* pTask) { + ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); + + const char* id = pTask->id.idStr; + int32_t numOfElems = taosQueueItemSize(pTask->outputInfo.queue->queue); if (numOfElems > 0) { - qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr, - numOfElems); + qDebug("s-task:%s try to dispatch intermediate block to downstream, elem in outputQ:%d", id, numOfElems); } // to make sure only one dispatch is running - int8_t old = atomic_val_compare_exchange_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); + int8_t old = + atomic_val_compare_exchange_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); if (old != TASK_OUTPUT_STATUS__NORMAL) { - qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); + qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", id, old); return 0; } ASSERT(pTask->msgInfo.pData == NULL); - qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pInfo->status); + qDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputInfo.status); - SStreamDataBlock* pBlock = streamQueueNextItem(pInfo->queue); + SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputInfo.queue); if (pBlock == NULL) { - atomic_store_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pInfo->status); + atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); + qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputInfo.status); return 0; } @@ -623,8 +528,8 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { break; } - qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr, - tstrerror(terrno), pInfo->status, retryCount); + qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", id, + tstrerror(terrno), pTask->outputInfo.status, retryCount); // todo deal with only partially success dispatch case atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); @@ -646,6 +551,294 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } +int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { + SStreamScanHistoryFinishReq req = { + .streamId = pTask->id.streamId, + .childId = pTask->info.selfChildId, + .upstreamTaskId = pTask->id.taskId, + .upstreamNodeId = pTask->pMeta->vgId, + }; + + // serialize + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; + pTask->notReadyTasks = 1; + doDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgs = taosArrayGetSize(vgInfo); + pTask->notReadyTasks = numOfVgs; + + qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, + numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); + for (int32_t i = 0; i < numOfVgs; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); + req.downstreamTaskId = pVgInfo->taskId; + doDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + } + } else { + qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); + streamProcessScanHistoryFinishRsp(pTask); + } + + return 0; +} + +// this function is usually invoked by sink/agg task +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pReadyMsgList); + ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) == num); + + for (int32_t i = 0; i < num; ++i) { + SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); + + qDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, + pInfo->upStreamTaskId); + } + + taosArrayClear(pTask->pReadyMsgList); + qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); + + return TSDB_CODE_SUCCESS; +} + +// this function is only invoked by source task, and send rsp to mnode +int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosArrayGetSize(pTask->pReadyMsgList) == 1); + SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, 0); + + tmsgSendRsp(&pInfo->msg); + + taosArrayClear(pTask->pReadyMsgList); + qDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); + + return TSDB_CODE_SUCCESS; +} + +int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { + int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); + void* buf = taosMemoryCalloc(1, dataStrLen); + if (buf == NULL) return -1; + + SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; + pRetrieve->useconds = 0; + pRetrieve->precision = TSDB_DEFAULT_PRECISION; + pRetrieve->compressed = 0; + pRetrieve->completed = 1; + pRetrieve->streamBlockType = pBlock->info.type; + pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); + pRetrieve->skey = htobe64(pBlock->info.window.skey); + pRetrieve->ekey = htobe64(pBlock->info.window.ekey); + pRetrieve->version = htobe64(pBlock->info.version); + pRetrieve->watermark = htobe64(pBlock->info.watermark); + memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); + + int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); + pRetrieve->numOfCols = htonl(numOfCols); + + int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); + actualLen += sizeof(SRetrieveTableRsp); + ASSERT(actualLen <= dataStrLen); + taosArrayPush(pReq->dataLen, &actualLen); + taosArrayPush(pReq->data, &buf); + + pReq->totalLen += dataStrLen; + return 0; +} + +int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet) { + void* buf = NULL; + int32_t code = -1; + SRpcMsg msg = {0}; + + int32_t tlen; + tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) { + if (buf) { + rpcFreeCont(buf); + } + return code; + } + + tEncoderClear(&encoder); + + initRpcMsg(&msg, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, buf, tlen + sizeof(SMsgHead)); + + tmsgSendReq(pEpSet, &msg); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, + pReq->downstreamTaskId, vgId); + return 0; +} + +int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { + void* buf = NULL; + int32_t code = -1; + SRpcMsg msg = {0}; + + // serialize + int32_t tlen; + tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); + if (code < 0) { + goto FAIL; + } + + code = -1; + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + goto FAIL; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) { + goto FAIL; + } + tEncoderClear(&encoder); + + initRpcMsg(&msg, pTask->msgInfo.msgType, buf, tlen + sizeof(SMsgHead)); + qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); + + return tmsgSendReq(pEpSet, &msg); + +FAIL: + if (buf) { + rpcFreeCont(buf); + } + + return code; +} + +int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, + int8_t isSucceed) { + int32_t len = 0; + int32_t code = 0; + SEncoder encoder; + + SStreamCheckpointSourceRsp rsp = { + .checkpointId = pReq->checkpointId, + .taskId = pReq->taskId, + .nodeId = pReq->nodeId, + .streamId = pReq->streamId, + .expireTime = pReq->expireTime, + .mnodeId = pReq->mnodeId, + .success = isSucceed, + }; + + tEncodeSize(tEncodeStreamCheckpointSourceRsp, &rsp, len, code); + if (code < 0) { + return code; + } + + void* pBuf = rpcMallocCont(sizeof(SMsgHead) + len); + if (pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)pBuf)->vgId = htonl(pReq->mnodeId); + + void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); + + tEncoderInit(&encoder, (uint8_t*)abuf, len); + tEncodeStreamCheckpointSourceRsp(&encoder, &rsp); + tEncoderClear(&encoder); + + initRpcMsg(pMsg, 0, pBuf, sizeof(SMsgHead) + len); + pMsg->info = *pRpcInfo; + return 0; +} + +int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, + SStreamTask* pTask, int8_t isSucceed) { + SStreamChkptReadyInfo info = {0}; + buildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, isSucceed); + + if (pTask->pReadyMsgList == NULL) { + pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + } + + taosArrayPush(pTask->pReadyMsgList, &info); + qDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); + return TSDB_CODE_SUCCESS; +} + +int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { + int32_t code = 0; + int32_t tlen = 0; + void* buf = NULL; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + return TSDB_CODE_SUCCESS; + } + + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + + SStreamCheckpointReadyMsg req = {0}; + req.downstreamNodeId = pTask->pMeta->vgId; + req.downstreamTaskId = pTask->id.taskId; + req.streamId = pTask->id.streamId; + req.checkpointId = checkpointId; + req.childId = pInfo->childId; + req.upstreamNodeId = pInfo->nodeId; + req.upstreamTaskId = pInfo->taskId; + + tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { + rpcFreeCont(buf); + return code; + } + tEncoderClear(&encoder); + + ASSERT(req.upstreamTaskId != 0); + + SStreamChkptReadyInfo info = {.upStreamTaskId = pInfo->taskId, .upstreamNodeEpset = pInfo->epSet}; + initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + info.msg.info.noResp = 1; // refactor later. + + qDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", + pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.downstreamNodeId, index); + + if (pTask->pReadyMsgList == NULL) { + pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + } + + taosArrayPush(pTask->pReadyMsgList, &info); + return 0; +} + int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -668,7 +861,7 @@ int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistory return 0; } -int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) { +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen) { int32_t len = 0; int32_t code = 0; SEncoder encoder; @@ -699,6 +892,16 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, tEncodeCompleteHistoryDataMsg(&encoder, &msg); tEncoderClear(&encoder); + *pBuffer = pBuf; + *pLen = len; + return 0; +} + +int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) { + void* pBuf = NULL; + int32_t len = 0; + + streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len); SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); SStreamContinueExecInfo info = {.taskId = pReq->upstreamTaskId, .epset = pInfo->epSet}; @@ -821,3 +1024,57 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i return 0; } + +int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1; + + int32_t size = taosArrayGetSize(pMsg->pNodeList); + if (tEncodeI32(pEncoder, size) < 0) return -1; + + for (int32_t i = 0; i < size; ++i) { + SNodeUpdateInfo* pInfo = taosArrayGet(pMsg->pNodeList, i); + if (tEncodeI32(pEncoder, pInfo->nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pInfo->prevEp) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pInfo->newEp) < 0) return -1; + } + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1; + + int32_t size = 0; + if (tDecodeI32(pDecoder, &size) < 0) return -1; + pMsg->pNodeList = taosArrayInit(size, sizeof(SNodeUpdateInfo)); + for (int32_t i = 0; i < size; ++i) { + SNodeUpdateInfo info = {0}; + if (tDecodeI32(pDecoder, &info.nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &info.prevEp) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &info.newEp) < 0) return -1; + taosArrayPush(pMsg->pNodeList, &info); + } + + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeStreamTaskUpdateRsp(SEncoder* pEncoder, const SStreamTaskNodeUpdateRsp* pMsg) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeStreamTaskUpdateRsp(SDecoder* pDecoder, SStreamTaskNodeUpdateRsp* pMsg) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 5e5c165c9c..e61f26fb89 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -16,11 +16,10 @@ #include "streamInt.h" // maximum allowed processed block batches. One block may include several submit blocks -#define MAX_STREAM_EXEC_BATCH_NUM 32 -#define MIN_STREAM_EXEC_BATCH_NUM 4 -#define STREAM_RESULT_DUMP_THRESHOLD 100 +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define MIN_STREAM_EXEC_BATCH_NUM 4 +#define STREAM_RESULT_DUMP_THRESHOLD 100 -static int32_t updateCheckPointInfo(SStreamTask* pTask); static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); bool streamTaskShouldStop(const SStreamStatus* pStatus) { @@ -30,17 +29,11 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus) { bool streamTaskShouldPause(const SStreamStatus* pStatus) { int32_t status = atomic_load_8((int8_t*)&pStatus->taskStatus); - return (status == TASK_STATUS__PAUSE || status == TASK_STATUS__HALT); + return (status == TASK_STATUS__PAUSE); } static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize, int32_t* totalBlocks) { - int32_t code = updateCheckPointInfo(pTask); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return code; - } - int32_t numOfBlocks = taosArrayGetSize(pRes); if (numOfBlocks > 0) { SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes); @@ -53,7 +46,7 @@ static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, SIZE_IN_MB(size)); - code = streamTaskOutputResultBlock(pTask, pStreamBlocks); + int32_t code = streamTaskOutputResultBlock(pTask, pStreamBlocks); if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { // back pressure and record position destroyStreamDataBlock(pStreamBlocks); return -1; @@ -103,7 +96,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i resetTaskInfo(pExecutor); } - qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr()); + qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, tstrerror(code)); continue; } @@ -119,8 +112,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i taosArrayPush(pRes, &block); numOfBlocks += 1; - qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId, - pRetrieveBlock->reqId); + qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr, + pTask->info.selfChildId, pRetrieveBlock->reqId); } break; @@ -131,6 +124,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i // TODO } continue; + } else if (output->info.type == STREAM_CHECKPOINT) { + continue; // checkpoint block not dispatch to downstream tasks } SSDataBlock block = {0}; @@ -253,49 +248,27 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { return 0; } -int32_t updateCheckPointInfo(SStreamTask* pTask) { - int64_t ckId = 0; - int64_t dataVer = 0; - qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); +int32_t streamTaskGetInputQItems(const SStreamTask* pTask) { + int32_t numOfItems1 = taosQueueItemSize(pTask->inputQueue->queue); + int32_t numOfItems2 = taosQallItemSize(pTask->inputQueue->qall); - SCheckpointInfo* pCkInfo = &pTask->chkInfo; - if (ckId > pCkInfo->id) { // save it since the checkpoint is updated - qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64 - ", checkPoint id:%" PRId64 " -> %" PRId64, - pTask->id.idStr, pCkInfo->version, dataVer, pCkInfo->id, ckId); - - pTask->chkInfo = (SCheckpointInfo){.version = dataVer, .id = ckId, .currentVer = pCkInfo->currentVer}; - - taosWLockLatch(&pTask->pMeta->lock); - - streamMetaSaveTask(pTask->pMeta, pTask); - if (streamMetaCommit(pTask->pMeta) < 0) { - taosWUnLockLatch(&pTask->pMeta->lock); - qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr()); - return -1; - } else { - taosWUnLockLatch(&pTask->pMeta->lock); - qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr); - } - } - - return TSDB_CODE_SUCCESS; + return numOfItems1 + numOfItems2; } +// wait for the stream task to be idle static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { - // wait for the stream task to be idle - int64_t st = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + int64_t st = taosGetTimestampMs(); while (!streamTaskIsIdle(pStreamTask)) { - qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr, - pTask->info.taskLevel, pStreamTask->id.idStr); + qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel, + pStreamTask->id.idStr); taosMsleep(100); } double el = (taosGetTimestampMs() - st) / 1000.0; if (el > 0) { - qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", pTask->id.idStr, - pStreamTask->id.idStr, el); + qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", id, pStreamTask->id.idStr, el); } } @@ -335,7 +308,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { ASSERT(status == TASK_STATUS__HALT || status == TASK_STATUS__DROPPING); } else { - ASSERT(status == TASK_STATUS__SCAN_HISTORY); + ASSERT(status == TASK_STATUS__NORMAL); pStreamTask->status.taskStatus = TASK_STATUS__HALT; qDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } @@ -388,7 +361,8 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // 7. pause allowed. streamTaskEnablePause(pStreamTask); if (taosQueueEmpty(pStreamTask->inputQueue->queue)) { - SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);; + SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); pDelBlock->info.rows = 0; pDelBlock->info.version = 0; @@ -419,89 +393,53 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { return code; } -static int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { - int32_t retryTimes = 0; - int32_t MAX_RETRY_TIMES = 5; - const char* id = pTask->id.idStr; +// set input +static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_t* pVer, const char* id) { + void* pExecutor = pTask->exec.pExecutor; - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one - while (1) { - if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); - return TSDB_CODE_SUCCESS; - } + const SStreamQueueItem* pItem = pInput; + if (pItem->type == STREAM_INPUT__GET_RES) { + const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; + qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); - if (qItem == NULL) { - qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); + const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; + qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, + pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); + ASSERT((*pVer) <= pSubmit->submit.ver); + (*pVer) = pSubmit->submit.ver; - qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); + } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { + const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput; - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } - } + SArray* pBlockList = pBlock->blocks; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); - // non sink task - while (1) { - if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { + const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput; - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); - if (qItem == NULL) { - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { - taosMsleep(10); - qDebug("try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); - continue; - } + SArray* pBlockList = pMerged->submits; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks, + pMerged->ver); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); + ASSERT((*pVer) <= pMerged->ver); + (*pVer) = pMerged->ver; - qDebug("break batchSize:%d, %s", *numOfBlocks, id); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { + const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; + qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - // do not merge blocks for sink node and check point data block - if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || - qItem->type == STREAM_INPUT__TRANS_STATE) { - if (*pInput == NULL) { - qDebug("s-task:%s checkpoint/transtate msg extracted, start to process immediately", id); - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } else { - // previous existed blocks needs to be handle, before handle the checkpoint msg block - qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id, *numOfBlocks); - streamQueueProcessFail(pTask->inputQueue); - return TSDB_CODE_SUCCESS; - } - } else { - if (*pInput == NULL) { - ASSERT((*numOfBlocks) == 0); - *pInput = qItem; - } else { - // todo we need to sort the data block, instead of just appending into the array list. - void* newRet = streamMergeQueueItem(*pInput, qItem); - if (newRet == NULL) { - qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); - streamQueueProcessFail(pTask->inputQueue); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__CHECKPOINT || pItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + const SStreamDataBlock* pCheckpoint = (const SStreamDataBlock*)pInput; + qSetMultiStreamInput(pExecutor, pCheckpoint->blocks, 1, pItem->type); - *pInput = newRet; - } - - *numOfBlocks += 1; - streamQueueProcessSuccess(pTask->inputQueue); - - if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { - qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); - return TSDB_CODE_SUCCESS; - } - } + } else { + ASSERT(0); } } @@ -543,7 +481,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock } else { // level == TASK_LEVEL__SINK streamFreeQitem((SStreamQueueItem*)pBlock); } - } else { // non-dispatch task, do task state transfer directly + } else { // non-dispatch task, do task state transfer directly streamFreeQitem((SStreamQueueItem*)pBlock); if (level != TASK_LEVEL__SINK) { qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); @@ -569,20 +507,28 @@ int32_t streamExecForAll(SStreamTask* pTask) { const char* id = pTask->id.idStr; while (1) { - int32_t batchSize = 0; + int32_t numOfBlocks = 0; SStreamQueueItem* pInput = NULL; if (streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s stream task stopped, abort", id); + qDebug("s-task:%s stream task is stopped", id); break; } // merge multiple input data if possible in the input queue. qDebug("s-task:%s start to extract data block from inputQ", id); - /*int32_t code = */extractBlocksFromInputQ(pTask, &pInput, &batchSize); + /*int32_t code = */ extractBlocksFromInputQ(pTask, &pInput, &numOfBlocks); if (pInput == NULL) { - ASSERT(batchSize == 0); - break; + ASSERT(numOfBlocks == 0); + return 0; + } + + int32_t type = pInput->type; + + // dispatch checkpoint msg to all downstream tasks + if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput); + continue; } if (pInput->type == STREAM_INPUT__TRANS_STATE) { @@ -591,60 +537,52 @@ int32_t streamExecForAll(SStreamTask* pTask) { } if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); - qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); - streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); - continue; + ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT); + + if (type == STREAM_INPUT__DATA_BLOCK) { + qDebug("s-task:%s sink task start to sink %d blocks", id, numOfBlocks); + streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); + continue; + } } int64_t st = taosGetTimestampMs(); - qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize); - { - // set input - void* pExecutor = pTask->exec.pExecutor; + const SStreamQueueItem* pItem = pInput; + qDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); - const SStreamQueueItem* pItem = pInput; - if (pItem->type == STREAM_INPUT__GET_RES) { - const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; - qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; - qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); - qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, - pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); - } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { - const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput; - - SArray* pBlockList = pBlock->blocks; - int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); - } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { - const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput; - - SArray* pBlockList = pMerged->submits; - int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d", id, pTask, numOfBlocks); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); - } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { - const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; - qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - } else { - ASSERT(0); - } - } + int64_t ver = pTask->chkInfo.checkpointVer; + doSetStreamInputBlock(pTask, pInput, &ver, id); int64_t resSize = 0; int32_t totalBlocks = 0; streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); - double el = (taosGetTimestampMs() - st) / 1000.0; - qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", - id, el, SIZE_IN_MB(resSize), totalBlocks); + double el = (taosGetTimestampMs() - st) / 1000.0; + qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, + SIZE_IN_MB(resSize), totalBlocks); + + // update the currentVer if processing the submit blocks. + ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.currentVer && ver >= pTask->chkInfo.checkpointVer); + + if (ver != pTask->chkInfo.checkpointVer) { + qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64, pTask->id.idStr, + pTask->chkInfo.checkpointVer, ver); + pTask->chkInfo.checkpointVer = ver; + } streamFreeQitem(pInput); + + // todo other thread may change the status + // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed. + if (type == STREAM_INPUT__CHECKPOINT) { +// ASSERT(pTask->status.taskStatus == TASK_STATUS__CK); +// pTask->status.taskStatus = TASK_STATUS__CK_READY; + qDebug("s-task:%s checkpoint block received, set the status:%s", pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus)); + streamTaskBuildCheckpoint(pTask); + return 0; + } } return 0; @@ -671,7 +609,8 @@ int32_t streamTryExec(SStreamTask* pTask) { return -1; } - // todo the task should be commit here +// streamTaskBuildCheckpoint(pTask); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); @@ -711,7 +650,7 @@ int32_t streamTaskReloadState(SStreamTask* pTask) { } int32_t streamAlignTransferState(SStreamTask* pTask) { - int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); int32_t old = atomic_val_compare_exchange_32(&pTask->transferStateAlignCnt, 0, numOfUpstream); if (old == 0) { qDebug("s-task:%s set the transfer state aligncnt %d", pTask->id.idStr, numOfUpstream); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 45878bb865..f1fb97bc64 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -16,49 +16,113 @@ #include "executor.h" #include "streamBackendRocksdb.h" #include "streamInt.h" +#include "tmisce.h" #include "tref.h" +#include "tstream.h" #include "ttimer.h" +#define META_HB_CHECK_INTERVAL 200 +#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec +#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) + static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; -int32_t streamBackendId = 0; -int32_t streamBackendCfWrapperId = 0; + +int32_t streamBackendId = 0; +int32_t streamBackendCfWrapperId = 0; +int32_t streamMetaId = 0; + +static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta); +static void metaHbToMnode(void* param, void* tmrId); +static void streamMetaClear(SStreamMeta* pMeta); +static int32_t streamMetaBegin(SStreamMeta* pMeta); +static void streamMetaCloseImpl(void* arg); +static void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask); + +typedef struct { + TdThreadMutex mutex; + SHashObj* pTable; +} SMetaRefMgt; + +SMetaRefMgt gMetaRefMgt; + +void metaRefMgtInit(); +void metaRefMgtCleanup(); +int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid); static void streamMetaEnvInit() { streamBackendId = taosOpenRef(64, streamBackendCleanup); streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup); + + streamMetaId = taosOpenRef(64, streamMetaCloseImpl); + + metaRefMgtInit(); } void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } void streamMetaCleanup() { taosCloseRef(streamBackendId); taosCloseRef(streamBackendCfWrapperId); + taosCloseRef(streamMetaId); + + metaRefMgtCleanup(); } -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) { +void metaRefMgtInit() { + taosThreadMutexInit(&(gMetaRefMgt.mutex), NULL); + gMetaRefMgt.pTable = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); +} + +void metaRefMgtCleanup() { + void* pIter = taosHashIterate(gMetaRefMgt.pTable, NULL); + while (pIter) { + SArray* list = *(SArray**)pIter; + for (int i = 0; i < taosArrayGetSize(list); i++) { + void* rid = taosArrayGetP(list, i); + taosMemoryFree(rid); + } + taosArrayDestroy(list); + pIter = taosHashIterate(gMetaRefMgt.pTable, pIter); + } + taosHashCleanup(gMetaRefMgt.pTable); + + taosThreadMutexDestroy(&gMetaRefMgt.mutex); +} + +int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { + taosThreadMutexLock(&gMetaRefMgt.mutex); + void* p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId)); + if (p == NULL) { + SArray* list = taosArrayInit(8, sizeof(void*)); + taosArrayPush(list, &rid); + taosHashPut(gMetaRefMgt.pTable, &vgId, sizeof(vgId), &list, sizeof(void*)); + } else { + SArray* list = *(SArray**)p; + taosArrayPush(list, &rid); + } + taosThreadMutexUnlock(&gMetaRefMgt.mutex); + return 0; +} + +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) { int32_t code = -1; SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); if (pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + qError("vgId:%d failed to prepare stream meta, alloc size:%" PRIzu ", out of memory", vgId, sizeof(SStreamMeta)); return NULL; } - int32_t len = strlen(path) + 20; - char* streamPath = taosMemoryCalloc(1, len); - sprintf(streamPath, "%s/%s", path, "stream"); - pMeta->path = taosStrdup(streamPath); + int32_t len = strlen(path) + 64; + char* tpath = taosMemoryCalloc(1, len); + + sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream"); + pMeta->path = tpath; + if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { goto _err; } - memset(streamPath, 0, len); - sprintf(streamPath, "%s/%s", pMeta->path, "checkpoints"); - code = taosMulModeMkDir(streamPath, 0755, false); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(code); - goto _err; - } - - if (tdbTbOpen("task.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { + if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { goto _err; } @@ -66,6 +130,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } + if (streamMetaBegin(pMeta) < 0) { + goto _err; + } + _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); pMeta->pTasks = taosHashInit(64, fp, true, HASH_NO_LOCK); if (pMeta->pTasks == NULL) { @@ -73,80 +141,187 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF } // task list - pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamId)); + pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamTaskId)); if (pMeta->pTaskList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - if (streamMetaBegin(pMeta) < 0) { - goto _err; - } - pMeta->walScanCounter = 0; pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; + pMeta->stage = stage; - memset(streamPath, 0, len); - sprintf(streamPath, "%s/%s", pMeta->path, "state"); - code = taosMulModeMkDir(streamPath, 0755, false); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(code); - goto _err; - } + // send heartbeat every 5sec. + pMeta->rid = taosAddRef(streamMetaId, pMeta); + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + *pRid = pMeta->rid; + + metaRefMgtAdd(pMeta->vgId, pRid); + + pMeta->hbInfo.hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->hbInfo.tickCounter = 0; + pMeta->hbInfo.stopFlag = 0; - pMeta->streamBackend = streamBackendInit(streamPath); - if (pMeta->streamBackend == NULL) { - goto _err; - } - pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->pTaskBackendUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); + pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); + pMeta->chkpCap = 8; + taosInitRWLatch(&pMeta->chkpDirLock); - taosMemoryFree(streamPath); + pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + while (pMeta->streamBackend == NULL) { + taosMsleep(2 * 1000); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + if (pMeta->streamBackend == NULL) { + qError("vgId:%d failed to init stream backend", pMeta->vgId); + qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + } + } + pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + + code = streamBackendLoadCheckpointInfo(pMeta); taosInitRWLatch(&pMeta->lock); taosThreadMutexInit(&pMeta->backendMutex, NULL); + pMeta->pauseTaskNum = 0; + + qInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, + stage); return pMeta; _err: - taosMemoryFree(streamPath); taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); - // if (pMeta->streamBackend) streamBackendCleanup(pMeta->streamBackend); + + // taosThreadMutexDestroy(&pMeta->backendMutex); + // taosThreadRwlockDestroy(&pMeta->lock); + taosMemoryFree(pMeta); + qError("failed to open stream meta"); return NULL; } +int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { + streamMetaClear(pMeta); + + pMeta->streamBackendRid = -1; + pMeta->streamBackend = NULL; + + char* defaultPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128); + sprintf(defaultPath, "%s%s%s", pMeta->path, TD_DIRSEP, "state"); + taosRemoveDir(defaultPath); + + char* newPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128); + sprintf(newPath, "%s%s%s", pMeta->path, TD_DIRSEP, "received"); + + int32_t code = taosStatFile(newPath, NULL, NULL, NULL); + if (code == 0) { + // directory exists + code = taosRenameFile(newPath, defaultPath); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + qError("vgId:%d failed to rename file, from %s to %s, code:%s", pMeta->vgId, newPath, defaultPath, + tstrerror(terrno)); + + taosMemoryFree(defaultPath); + taosMemoryFree(newPath); + return -1; + } + } + + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + while (pMeta->streamBackend == NULL) { + taosMsleep(2 * 1000); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + if (pMeta->streamBackend == NULL) { + qError("vgId:%d failed to init stream backend", pMeta->vgId); + qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + // return -1; + } + } + pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + streamBackendLoadCheckpointInfo(pMeta); + + return 0; +} + +void streamMetaClear(SStreamMeta* pMeta) { + void* pIter = NULL; + while ((pIter = taosHashIterate(pMeta->pTasks, pIter)) != NULL) { + SStreamTask* p = *(SStreamTask**)pIter; + + // release the ref by timer + if (p->triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer + qDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); + taosTmrStop(p->schedTimer); + p->triggerParam = 0; + streamMetaReleaseTask(pMeta, p); + } + + streamMetaReleaseTask(pMeta, p); + } + + taosRemoveRef(streamBackendId, pMeta->streamBackendRid); + + taosHashClear(pMeta->pTasks); + taosHashClear(pMeta->pTaskBackendUnique); + + taosArrayClear(pMeta->pTaskList); + taosArrayClear(pMeta->chkpSaved); + taosArrayClear(pMeta->chkpInUse); +} + void streamMetaClose(SStreamMeta* pMeta) { + qDebug("start to close stream meta"); + if (pMeta == NULL) { + return; + } + + // int64_t rid = *(int64_t*)pMeta->pRid; + // if (taosTmrStop(pMeta->hbInfo.hbTmr)) { + // taosMemoryFree(pMeta->pRid); + // } else { + // // do nothing, stop by timer thread + // } + taosRemoveRef(streamMetaId, pMeta->rid); +} + +void streamMetaCloseImpl(void* arg) { + SStreamMeta* pMeta = arg; + qDebug("start to do-close stream meta"); + if (pMeta == NULL) { + return; + } + + streamMetaClear(pMeta); + tdbAbort(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); - void* pIter = NULL; - while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - tFreeStreamTask(*(SStreamTask**)pIter); - } + taosArrayDestroy(pMeta->pTaskList); + taosArrayDestroy(pMeta->chkpSaved); + taosArrayDestroy(pMeta->chkpInUse); taosHashCleanup(pMeta->pTasks); - taosRemoveRef(streamBackendId, pMeta->streamBackendRid); - pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList); + taosHashCleanup(pMeta->pTaskBackendUnique); + taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); - taosHashCleanup(pMeta->pTaskBackendUnique); + taosMemoryFree(pMeta); + qDebug("end to close stream meta"); } int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { @@ -168,7 +343,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { + int64_t key[2] = {0}; + extractStreamTaskKey(key, pTask); + + if (tdbTbUpsert(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); return -1; } @@ -177,12 +355,18 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { - int32_t code = tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(taskId), pMeta->txn); +void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask) { + pKey[0] = pTask->id.streamId; + pKey[1] = pTask->id.taskId; +} + +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { + int32_t code = tdbTbDelete(pMeta->pTaskDb, pKey, STREAM_TASK_KEY_LEN, pMeta->txn); if (code != 0) { - qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, taskId, tstrerror(terrno)); + qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pKey[1], + tstrerror(terrno)); } else { - qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, taskId); + qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pKey[1]); } return code; @@ -193,7 +377,7 @@ int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTa *pAdded = false; int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (p == NULL) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { tFreeStreamTask(pTask); @@ -226,10 +410,26 @@ int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { return (int32_t)size; } +int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { + int32_t num = 0; + size_t size = taosArrayGetSize(pMeta->pTaskList); + for (int32_t i = 0; i < size; ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + int64_t keys[2] = {pId->streamId, pId->taskId}; + + SStreamTask** p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + if ((*p)->info.fillHistory == 0) { + num += 1; + } + } + + return num; +} + SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { taosRLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; + int64_t keys[2] = {streamId, taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (ppTask != NULL) { if (!streamTaskShouldStop(&(*ppTask)->status)) { @@ -244,7 +444,7 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t return NULL; } -void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { +void streamMetaReleaseTask(SStreamMeta* UNUSED_PARAM(pMeta), SStreamTask* pTask) { int32_t ref = atomic_sub_fetch_32(&pTask->refCnt, 1); if (ref > 0) { qTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref); @@ -257,9 +457,9 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } -static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamId* id) { +static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamTaskId* id) { for (int32_t i = 0; i < num; ++i) { - SStreamId* pTaskId = taosArrayGet(pMeta->pTaskList, i); + SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); if (pTaskId->streamId == id->streamId && pTaskId->taskId == id->taskId) { taosArrayRemove(pMeta->pTaskList, i); break; @@ -273,10 +473,14 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // pre-delete operation taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; + int64_t keys[2] = {streamId, taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (ppTask) { pTask = *ppTask; + if (streamTaskShouldPause(&pTask->status)) { + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + } atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); } else { qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); @@ -317,7 +521,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t ASSERT(pTask->status.timerActive == 0); doRemoveIdFromList(pMeta, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id); - streamMetaRemoveTask(pMeta, taskId); + streamMetaRemoveTask(pMeta, keys); streamMetaReleaseTask(pMeta, pTask); } else { qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); @@ -338,38 +542,73 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) { // todo add error log int32_t streamMetaCommit(SStreamMeta* pMeta) { if (tdbCommit(pMeta->db, pMeta->txn) < 0) { - qError("failed to commit stream meta"); + qError("vgId:%d failed to commit stream meta", pMeta->vgId); return -1; } if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { - qError("failed to commit stream meta"); + qError("vgId:%d failed to do post-commit stream meta", pMeta->vgId); return -1; } if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + qError("vgId:%d failed to begin trans", pMeta->vgId); return -1; } return 0; } -int32_t streamMetaAbort(SStreamMeta* pMeta) { - if (tdbAbort(pMeta->db, pMeta->txn) < 0) { - return -1; +int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { + int64_t chkpId = 0; + + TBC* pCur = NULL; + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + return chkpId; } - if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, - TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - return -1; + void* pKey = NULL; + int32_t kLen = 0; + void* pVal = NULL; + int32_t vLen = 0; + SDecoder decoder; + + tdbTbcMoveToFirst(pCur); + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SCheckpointInfo info; + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); + if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { + continue; + } + tDecoderClear(&decoder); + + chkpId = TMAX(chkpId, info.checkpointId); } - return 0; + + qDebug("get max chkp id: %" PRId64 "", chkpId); + + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + + return chkpId; } -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { +static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + taosArrayDestroy(pRecycleList); +} + +int32_t streamLoadTasks(SStreamMeta* pMeta) { TBC* pCur = NULL; + qInfo("vgId:%d load stream tasks from meta files", pMeta->vgId); if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno)); return -1; @@ -380,26 +619,20 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; - SArray* pRecycleList = taosArrayInit(4, sizeof(int32_t)); + SArray* pRecycleList = taosArrayInit(4, STREAM_TASK_KEY_LEN); tdbTbcMoveToFirst(pCur); - while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); - taosArrayDestroy(pRecycleList); + doClear(pKey, pVal, pCur, pRecycleList); return -1; } + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); if (tDecodeStreamTask(&decoder, pTask) < 0) { tDecoderClear(&decoder); - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); - taosArrayDestroy(pRecycleList); + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); qError( "stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream " @@ -413,8 +646,10 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); - taosArrayPush(pRecycleList, &taskId); + int64_t key[2] = {0}; + extractStreamTaskKey(key, pTask); + taosArrayPush(pRecycleList, key); int32_t total = taosArrayGetSize(pRecycleList); qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); continue; @@ -422,14 +657,11 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { // do duplicate task check. int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (p == NULL) { - if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.version) < 0) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); + if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer) < 0) { + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); - taosArrayDestroy(pRecycleList); return -1; } @@ -437,22 +669,24 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { } else { tdbFree(pKey); tdbFree(pVal); - tdbTbcClose(pCur); taosMemoryFree(pTask); continue; } + streamTaskResetUpstreamStageInfo(pTask); if (taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, sizeof(void*)) < 0) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); - taosArrayDestroy(pRecycleList); return -1; } + if (streamTaskShouldPause(&pTask->status)) { + atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + } + ASSERT(pTask->status.downstreamReady == 0); } + qInfo("vgId:%d pause task num:%d", pMeta->vgId, pMeta->pauseTaskNum); tdbFree(pKey); tdbFree(pVal); @@ -463,12 +697,212 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { - int32_t taskId = *(int32_t*)taosArrayGet(pRecycleList, i); - streamMetaRemoveTask(pMeta, taskId); + int64_t* pId = taosArrayGet(pRecycleList, i); + streamMetaRemoveTask(pMeta, pId); } } - qDebug("vgId:%d load %d task from disk", pMeta->vgId, (int32_t)taosArrayGetSize(pMeta->pTaskList)); + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + qDebug("vgId:%d load %d tasks into meta from disk completed", pMeta->vgId, numOfTasks); taosArrayDestroy(pRecycleList); return 0; } + +int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->vgId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->numOfTasks) < 0) return -1; + + for (int32_t i = 0; i < pReq->numOfTasks; ++i) { + STaskStatusEntry* ps = taosArrayGet(pReq->pTaskStatus, i); + if (tEncodeI64(pEncoder, ps->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->status) < 0) return -1; + } + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->vgId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->numOfTasks) < 0) return -1; + + pReq->pTaskStatus = taosArrayInit(pReq->numOfTasks, sizeof(STaskStatusEntry)); + for (int32_t i = 0; i < pReq->numOfTasks; ++i) { + STaskStatusEntry hb = {0}; + if (tDecodeI64(pDecoder, &hb.streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &hb.taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &hb.status) < 0) return -1; + + taosArrayPush(pReq->pTaskStatus, &hb); + } + + tEndDecode(pDecoder); + return 0; +} + +static bool readyToSendHb(SMetaHbInfo* pInfo) { + if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { + // reset the counter + pInfo->tickCounter = 0; + return true; + } + return false; +} + +void metaHbToMnode(void* param, void* tmrId) { + int64_t rid = *(int64_t*)param; + + SStreamHbMsg hbMsg = {0}; + SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); + if (pMeta == NULL) { + // taosMemoryFree(param); + return; + } + + // need to stop, stop now + if (pMeta->hbInfo.stopFlag == STREAM_META_WILL_STOP) { + pMeta->hbInfo.stopFlag = STREAM_META_OK_TO_STOP; + qDebug("vgId:%d jump out of meta timer", pMeta->vgId); + taosReleaseRef(streamMetaId, rid); + return; + } + + if (!readyToSendHb(&pMeta->hbInfo)) { + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosReleaseRef(streamMetaId, rid); + return; + } + + taosRLockLatch(&pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + + SEpSet epset = {0}; + + hbMsg.vgId = pMeta->vgId; + hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry)); + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + int64_t keys[2] = {pId->streamId, pId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + + if ((*pTask)->info.fillHistory == 1) { + continue; + } + + STaskStatusEntry entry = {.streamId = pId->streamId, .taskId = pId->taskId, .status = (*pTask)->status.taskStatus}; + taosArrayPush(hbMsg.pTaskStatus, &entry); + + if (i == 0) { + epsetAssign(&epset, &(*pTask)->info.mnodeEpset); + } + } + + hbMsg.numOfTasks = taosArrayGetSize(hbMsg.pTaskStatus); + taosRUnLockLatch(&pMeta->lock); + + int32_t code = 0; + int32_t tlen = 0; + + tEncodeSize(tEncodeStreamHbMsg, &hbMsg, tlen, code); + if (code < 0) { + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + + SEncoder encoder; + tEncoderInit(&encoder, buf, tlen); + if ((code = tEncodeStreamHbMsg(&encoder, &hbMsg)) < 0) { + rpcFreeCont(buf); + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + tEncoderClear(&encoder); + + taosArrayDestroy(hbMsg.pTaskStatus); + + SRpcMsg msg = {0}; + initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); + msg.info.noResp = 1; + + qDebug("vgId:%d, build and send hb to mnode", pMeta->vgId); + + tmsgSendReq(&epset, &msg); + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosReleaseRef(streamMetaId, rid); +} + +static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { + bool inTimer = false; + + taosWLockLatch(&pMeta->lock); + + void* pIter = NULL; + while (1) { + pIter = taosHashIterate(pMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->status.timerActive >= 1) { + inTimer = true; + } + } + + taosWUnLockLatch(&pMeta->lock); + return inTimer; +} + +void streamMetaNotifyClose(SStreamMeta* pMeta) { + int32_t vgId = pMeta->vgId; + + qDebug("vgId:%d notify all stream tasks that the vnode is closing", vgId); + taosWLockLatch(&pMeta->lock); + + void* pIter = NULL; + while (1) { + pIter = taosHashIterate(pMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + qDebug("vgId:%d s-task:%s set closing flag", vgId, pTask->id.idStr); + streamTaskStop(pTask); + } + + taosWUnLockLatch(&pMeta->lock); + + // wait for the stream meta hb function stopping + pMeta->hbInfo.stopFlag = STREAM_META_WILL_STOP; + while (pMeta->hbInfo.stopFlag != STREAM_META_OK_TO_STOP) { + taosMsleep(100); + qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + } + + qDebug("vgId:%d start to check all tasks", vgId); + int64_t st = taosGetTimestampMs(); + + while (hasStreamTaskInTimer(pMeta)) { + qDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + int64_t el = taosGetTimestampMs() - st; + qDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el); +} \ No newline at end of file diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index e28c93b8b1..e0b6116457 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -15,33 +15,49 @@ #include "streamInt.h" +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define MIN_STREAM_EXEC_BATCH_NUM 4 + SStreamQueue* streamQueueOpen(int64_t cap) { SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); - if (pQueue == NULL) return NULL; + if (pQueue == NULL) { + return NULL; + } + pQueue->queue = taosOpenQueue(); pQueue->qall = taosAllocateQall(); + if (pQueue->queue == NULL || pQueue->qall == NULL) { - goto FAIL; + if (pQueue->queue) taosCloseQueue(pQueue->queue); + if (pQueue->qall) taosFreeQall(pQueue->qall); + taosMemoryFree(pQueue); + return NULL; } + pQueue->status = STREAM_QUEUE__SUCESS; taosSetQueueCapacity(pQueue->queue, cap); taosSetQueueMemoryCapacity(pQueue->queue, cap * 1024); return pQueue; - -FAIL: - if (pQueue->queue) taosCloseQueue(pQueue->queue); - if (pQueue->qall) taosFreeQall(pQueue->qall); - taosMemoryFree(pQueue); - return NULL; } -void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { - qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->queue, taosQueueItemSize(pQueue->queue)); - +void streamQueueCleanup(SStreamQueue* pQueue) { void* qItem = NULL; while ((qItem = streamQueueNextItem(pQueue)) != NULL) { streamFreeQitem(qItem); } + pQueue->status = STREAM_QUEUE__SUCESS; +} +// void streamQueueClose(SStreamQueue* pQueue) { +// streamQueueCleanup(pQueue); + +// taosFreeQall(pQueue->qall); +// taosCloseQueue(pQueue->queue); +// taosMemoryFree(pQueue); +// } + +void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { + qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->queue, taosQueueItemSize(pQueue->queue)); + streamQueueCleanup(pQueue); taosFreeQall(pQueue->qall); taosCloseQueue(pQueue->queue); @@ -106,18 +122,16 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) { } #endif -#define MAX_STREAM_EXEC_BATCH_NUM 128 -#define MIN_STREAM_EXEC_BATCH_NUM 16 - // todo refactor: // read data from input queue typedef struct SQueueReader { SStreamQueue* pQueue; - int32_t taskLevel; - int32_t maxBlocks; // maximum block in one batch - int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms + int32_t taskLevel; + int32_t maxBlocks; // maximum block in one batch + int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms } SQueueReader; +#if 0 SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* idstr) { int32_t numOfBlocks = 0; int32_t tryCount = 0; @@ -163,3 +177,100 @@ SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* return pRet; } +#endif + +int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { + int32_t retryTimes = 0; + int32_t MAX_RETRY_TIMES = 5; + const char* id = pTask->id.idStr; + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one + while (1) { + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + return TSDB_CODE_SUCCESS; + } + + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); + if (qItem == NULL) { + qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + return TSDB_CODE_SUCCESS; + } + + qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); + + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; + } + } + + // non sink task + while (1) { + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + return TSDB_CODE_SUCCESS; + } + + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); + if (qItem == NULL) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { + taosMsleep(10); + qDebug("===stream===try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); + continue; + } + + qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + return TSDB_CODE_SUCCESS; + } + + // do not merge blocks for sink node and check point data block + if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || + qItem->type == STREAM_INPUT__TRANS_STATE) { + if (*pInput == NULL) { + char* p = NULL; + if (qItem->type == STREAM_INPUT__CHECKPOINT) { + p = "checkpoint"; + } else if (qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + p = "checkpoint-trigger"; + } else { + p = "transtate"; + } + + qDebug("s-task:%s %s msg extracted, start to process immediately", id, p); + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; + } else { + // previous existed blocks needs to be handle, before handle the checkpoint msg block + qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id, + *numOfBlocks); + streamQueueProcessFail(pTask->inputQueue); + return TSDB_CODE_SUCCESS; + } + } else { + if (*pInput == NULL) { + ASSERT((*numOfBlocks) == 0); + *pInput = qItem; + } else { + // todo we need to sort the data block, instead of just appending into the array list. + void* newRet = streamMergeQueueItem(*pInput, qItem); + if (newRet == NULL) { + qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); + streamQueueProcessFail(pTask->inputQueue); + return TSDB_CODE_SUCCESS; + } + + *pInput = newRet; + } + + *numOfBlocks += 1; + streamQueueProcessSuccess(pTask->inputQueue); + + if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { + qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); + return TSDB_CODE_SUCCESS; + } + } + } +} diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 2506dbaead..be7c1584fd 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -13,7 +13,9 @@ * along with this program. If not, see . */ +#include #include "streamInt.h" +#include "trpc.h" #include "ttimer.h" #include "wal.h" @@ -63,7 +65,10 @@ const char* streamGetTaskStatusStr(int32_t status) { case TASK_STATUS__SCAN_HISTORY: return "scan-history"; case TASK_STATUS__HALT: return "halt"; case TASK_STATUS__PAUSE: return "paused"; + case TASK_STATUS__CK: return "check-point"; + case TASK_STATUS__CK_READY: return "check-point-ready"; case TASK_STATUS__DROPPING: return "dropping"; + case TASK_STATUS__STOP: return "stop"; default:return ""; } } @@ -92,8 +97,9 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { if (pTask->info.fillHistory) { streamSetParamForScanHistory(pTask); + streamTaskEnablePause(pTask); } - streamTaskEnablePause(pTask); + streamTaskScanHistoryPrepare(pTask); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); @@ -112,6 +118,7 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { .upstreamTaskId = pTask->id.taskId, .upstreamNodeId = pTask->info.nodeId, .childId = pTask->info.selfChildId, + .stage = pTask->pMeta->stage, }; // serialize @@ -122,9 +129,9 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { pTask->checkReqId = req.reqId; qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64 - "-%" PRId64 ", req:0x%" PRIx64, + "-%" PRId64 ", stage:%"PRId64" req:0x%" PRIx64, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer, - pWindow->skey, pWindow->ekey, req.reqId); + pWindow->skey, pWindow->ekey, req.stage, req.reqId); streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { @@ -143,8 +150,8 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId, - req.downstreamTaskId, req.downstreamNodeId, i); + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d, stage:%" PRId64, + pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i, req.stage); streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { @@ -169,12 +176,12 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p .downstreamTaskId = pRsp->downstreamTaskId, .downstreamNodeId = pRsp->downstreamNodeId, .childId = pRsp->childId, + .stage = pTask->pMeta->stage, }; - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId, - req.downstreamTaskId, req.downstreamNodeId); - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; @@ -183,6 +190,8 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); if (pVgInfo->taskId == req.downstreamTaskId) { + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet); } } @@ -191,8 +200,28 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p return 0; } -int32_t streamTaskCheckStatus(SStreamTask* pTask) { - return (pTask->status.downstreamReady == 1)? 1:0; +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + ASSERT(pInfo != NULL); + + if (stage == -1) { + qDebug("s-task:%s receive check msg from upstream task:0x%x, invalid stageId:%" PRId64 ", not ready", pTask->id.idStr, + upstreamTaskId, stage); + return 0; + } + + if (pInfo->stage == -1) { + pInfo->stage = stage; + qDebug("s-task:%s receive check msg from upstream task:0x%x, init stage value:%" PRId64, pTask->id.idStr, + upstreamTaskId, stage); + } + + if (pInfo->stage < stage) { + qError("s-task:%s receive msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 ", prev:%" PRId64, + pTask->id.idStr, upstreamTaskId, vgId, stage, pInfo->stage); + } + + return ((pTask->status.downstreamReady == 1) && (pInfo->stage == stage))? 1:0; } static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { @@ -216,6 +245,7 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { launchFillHistoryTask(pTask); } +// todo handle error int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); const char* id = pTask->id.idStr; @@ -259,10 +289,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs doProcessDownstreamReadyRsp(pTask, 1); } } else { // not ready, wait for 100ms and retry - qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId, - pRsp->downstreamNodeId); + qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, wait for 100ms and retry", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage); taosMsleep(100); - streamRecheckDownstream(pTask, pRsp); } @@ -339,39 +368,6 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) { return streamScanExec(pTask, 100); } -int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { - SStreamScanHistoryFinishReq req = { - .streamId = pTask->id.streamId, - .childId = pTask->info.selfChildId, - .upstreamTaskId = pTask->id.taskId, - .upstreamNodeId = pTask->pMeta->vgId, - }; - - // serialize - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; - pTask->notReadyTasks = 1; - streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t numOfVgs = taosArrayGetSize(vgInfo); - pTask->notReadyTasks = numOfVgs; - - qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, - numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); - for (int32_t i = 0; i < numOfVgs; i++) { - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - req.downstreamTaskId = pVgInfo->taskId; - streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); - } - } else { - qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); - streamProcessScanHistoryFinishRsp(pTask); - } - - return 0; -} - int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { SStreamDataBlock* pTranstate = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pTranstate == NULL) { @@ -405,7 +401,7 @@ int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { // agg int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask) { - pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); + pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, streamGetTaskStatusStr(pTask->status.taskStatus)); @@ -429,14 +425,31 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory int32_t taskLevel = pTask->info.taskLevel; ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK); - // sink node do not send end of scan history msg to its upstream, which is agg task. + if (pTask->status.taskStatus != TASK_STATUS__SCAN_HISTORY) { + qError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", + pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->upstreamTaskId); + + void* pBuf = NULL; + int32_t len = 0; + streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len); + + SRpcMsg msg = {.info = *pRpcInfo}; + initRpcMsg(&msg, 0, pBuf, sizeof(SMsgHead) + len); + + tmsgSendRsp(&msg); + qDebug("s-task:%s level:%d notify upstream:0x%x(vgId:%d) to continue process data from WAL", pTask->id.idStr, + pTask->info.taskLevel, pReq->upstreamTaskId, pReq->upstreamNodeId); + return 0; + } + + // sink tasks do not send end of scan history msg to its upstream, which is agg task. streamAddEndScanHistoryMsg(pTask, pRpcInfo, pReq); int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1); ASSERT(left >= 0); if (left == 0) { - int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamInfoList); qDebug( "s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data and send " "rsp to all upstream tasks", @@ -446,11 +459,18 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory streamAggUpstreamScanHistoryFinish(pTask); } + // all upstream tasks have completed the scan-history task in the stream time window, let's start to extract data + // from the WAL files, which contains the real time stream data. streamNotifyUpstreamContinue(pTask); - // sink node does not receive the pause msg from mnode, so does not need enable it - if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - streamTaskEnablePause(pTask); + // mnode will not send the pause/resume message to the sink task, so no need to enable the pause for sink tasks. + if (taskLevel == TASK_LEVEL__AGG) { + /*int32_t code = */streamTaskScanHistoryDataComplete(pTask); + } else { // for sink task, set normal + if (pTask->status.taskStatus != TASK_STATUS__PAUSE && pTask->status.taskStatus != TASK_STATUS__STOP && + pTask->status.taskStatus != TASK_STATUS__DROPPING) { + streamSetStatusNormal(pTask); + } } } else { qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", @@ -659,6 +679,7 @@ int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } @@ -672,6 +693,7 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1; tEndDecode(pDecoder); return 0; } @@ -685,6 +707,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; @@ -699,6 +722,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1; tEndDecode(pDecoder); return 0; @@ -776,22 +800,18 @@ void launchFillHistoryTask(SStreamTask* pTask) { streamLaunchFillHistoryTask(pTask); } +// only the downstream tasks are ready, set the task to be ready to work. void streamTaskCheckDownstreamTasks(SStreamTask* pTask) { if (pTask->info.fillHistory) { qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); return; } - ASSERT(pTask->status.downstreamReady == 0); - - // check downstream tasks for itself streamTaskDoCheckDownstreamTasks(pTask); } // normal -> pause, pause/stop/dropping -> pause, halt -> pause, scan-history -> pause -void streamTaskPause(SStreamTask* pTask) { - SStreamMeta* pMeta = pTask->pMeta; - +void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { int64_t st = taosGetTimestampMs(); int8_t status = pTask->status.taskStatus; @@ -806,6 +826,12 @@ void streamTaskPause(SStreamTask* pTask) { return; } + if(pTask->info.taskLevel == TASK_LEVEL__SINK) { + int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + return; + } + while (!pTask->status.pauseAllowed || (pTask->status.taskStatus == TASK_STATUS__HALT)) { status = pTask->status.taskStatus; if (status == TASK_STATUS__DROPPING) { @@ -835,6 +861,8 @@ void streamTaskPause(SStreamTask* pTask) { atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); taosWUnLockLatch(&pMeta->lock); // in case of fill-history task, stop the tsdb file scan operation. @@ -844,16 +872,20 @@ void streamTaskPause(SStreamTask* pTask) { } int64_t el = taosGetTimestampMs() - st; - qDebug("vgId:%d s-task:%s set pause flag, prev:%s, elapsed time:%dms", pMeta->vgId, pTask->id.idStr, + qDebug("vgId:%d s-task:%s set pause flag, prev:%s, pause elapsed time:%dms", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(pTask->status.keepTaskStatus), (int32_t)el); } -void streamTaskResume(SStreamTask* pTask) { +void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta) { int8_t status = pTask->status.taskStatus; if (status == TASK_STATUS__PAUSE) { pTask->status.taskStatus = pTask->status.keepTaskStatus; pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; - qDebug("s-task:%s resume from pause", pTask->id.idStr); + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s resume from pause, status%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s sink task.resume from pause, status%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else { qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); } diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c new file mode 100644 index 0000000000..8a4500dd86 --- /dev/null +++ b/source/libs/stream/src/streamSnapshot.c @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "streamSnapshot.h" +#include "query.h" +#include "rocksdb/c.h" +#include "streamBackendRocksdb.h" +#include "tcommon.h" + +enum SBackendFileType { + ROCKSDB_OPTIONS_TYPE = 1, + ROCKSDB_MAINFEST_TYPE = 2, + ROCKSDB_SST_TYPE = 3, + ROCKSDB_CURRENT_TYPE = 4, + ROCKSDB_CHECKPOINT_META_TYPE = 5, +}; + +typedef struct SBackendFileItem { + char* name; + int8_t type; + int64_t size; +} SBackendFileItem; +typedef struct SBackendFile { + char* pCurrent; + char* pMainfest; + char* pOptions; + SArray* pSst; + char* pCheckpointMeta; + char* path; +} SBanckendFile; +struct SStreamSnapHandle { + void* handle; + SBanckendFile* pBackendFile; + int64_t checkpointId; + int64_t seraial; + int64_t offset; + TdFilePtr fd; + int8_t filetype; + SArray* pFileList; + int32_t currFileIdx; +}; +struct SStreamSnapBlockHdr { + int8_t type; + int8_t flag; + int64_t index; + char name[128]; + int64_t totalSize; + int64_t size; + uint8_t data[]; +}; +struct SStreamSnapReader { + void* pMeta; + int64_t sver; + int64_t ever; + SStreamSnapHandle handle; + int64_t checkpointId; +}; +struct SStreamSnapWriter { + void* pMeta; + int64_t sver; + int64_t ever; + SStreamSnapHandle handle; +}; +const char* ROCKSDB_OPTIONS = "OPTIONS"; +const char* ROCKSDB_MAINFEST = "MANIFEST"; +const char* ROCKSDB_SST = "sst"; +const char* ROCKSDB_CURRENT = "CURRENT"; +const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; +static int64_t kBlockSize = 64 * 1024; + +int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta); +void streamSnapHandleDestroy(SStreamSnapHandle* handle); + +// static void streamBuildFname(char* path, char* file, char* fullname) + +#define STREAM_ROCKSDB_BUILD_FULLNAME(path, file, fullname) \ + do { \ + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, file); \ + } while (0) + +int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { + int ret = 0; + + char* fullname = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); + + ret = taosStatFile(fullname, sz, NULL, NULL); + taosMemoryFree(fullname); + + return ret; +} + +TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) { + char fullname[256] = {0}; + STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname); + return taosOpenFile(fullname, opt); +} + +int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) { + // impl later + int len = strlen(path); + char* tdir = taosMemoryCalloc(1, len + 256); + memcpy(tdir, path, len); + + int32_t code = 0; + + int8_t validChkp = 0; + if (chkpId != 0) { + sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP, + chkpId); + if (taosIsDir(tdir)) { + validChkp = 1; + qInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); + streamBackendAddInUseChkp(pMeta, chkpId); + } else { + qWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, tdir); + } + } + + // no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it + if (validChkp == 0) { + sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state"); + char* chkpdir = taosMemoryCalloc(1, len + 256); + sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp"); + taosMemoryFree(tdir); + + tdir = chkpdir; + qInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); + + code = streamBackendTriggerChkp(pMeta, tdir); + if (code != 0) { + qError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); + taosMemoryFree(tdir); + return code; + } + chkpId = 0; + } + + qInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); + + TdDirPtr pDir = taosOpenDir(tdir); + if (NULL == pDir) { + qError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); + goto _err; + } + + SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); + pHandle->pBackendFile = pFile; + pHandle->checkpointId = chkpId; + pHandle->seraial = 0; + + pFile->path = tdir; + pFile->pSst = taosArrayInit(16, sizeof(void*)); + + TdDirEntryPtr pDirEntry; + while ((pDirEntry = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(pDirEntry); + if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { + pFile->pCurrent = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { + pFile->pMainfest = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { + pFile->pOptions = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && + 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { + pFile->pCheckpointMeta = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_SST) && + 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { + char* sst = taosStrdup(name); + taosArrayPush(pFile->pSst, &sst); + } + } + { + char* buf = taosMemoryCalloc(1, 512); + sprintf(buf, "[current: %s,", pFile->pCurrent); + sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); + sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); + + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* name = taosArrayGetP(pFile->pSst, i); + sprintf(buf + strlen(buf), "%s,", name); + } + sprintf(buf + strlen(buf) - 1, "]"); + + qInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); + taosMemoryFree(buf); + } + + taosCloseDir(&pDir); + + if (pFile->pCurrent == NULL) { + qError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); + code = -1; + tdir = NULL; + goto _err; + } + SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); + + SBackendFileItem item; + // current + item.name = pFile->pCurrent; + item.type = ROCKSDB_CURRENT_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + + // mainfest + item.name = pFile->pMainfest; + item.type = ROCKSDB_MAINFEST_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + + // options + item.name = pFile->pOptions; + item.type = ROCKSDB_OPTIONS_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + // sst + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* sst = taosArrayGetP(pFile->pSst, i); + item.name = sst; + item.type = ROCKSDB_SST_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + } + // meta + item.name = pFile->pCheckpointMeta; + item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) { + taosArrayPush(list, &item); + } + + pHandle->pBackendFile = pFile; + + pHandle->currFileIdx = 0; + pHandle->pFileList = list; + pHandle->seraial = 0; + pHandle->offset = 0; + pHandle->handle = pMeta; + return 0; +_err: + streamSnapHandleDestroy(pHandle); + taosMemoryFreeClear(tdir); + + code = -1; + return code; +} + +void streamSnapHandleDestroy(SStreamSnapHandle* handle) { + SBanckendFile* pFile = handle->pBackendFile; + + if (handle->checkpointId == 0) { + // del tmp dir + if (taosIsDir(pFile->path)) { + taosRemoveDir(pFile->path); + } + } else { + streamBackendDelInUseChkp(handle->handle, handle->checkpointId); + } + if (pFile) { + taosMemoryFree(pFile->pCheckpointMeta); + taosMemoryFree(pFile->pCurrent); + taosMemoryFree(pFile->pMainfest); + taosMemoryFree(pFile->pOptions); + taosMemoryFree(pFile->path); + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* sst = taosArrayGetP(pFile->pSst, i); + taosMemoryFree(sst); + } + taosArrayDestroy(pFile->pSst); + taosMemoryFree(pFile); + } + taosArrayDestroy(handle->pFileList); + taosCloseFile(&handle->fd); + return; +} + +int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* path, SStreamSnapReader** ppReader) { + // impl later + SStreamSnapReader* pReader = taosMemoryCalloc(1, sizeof(SStreamSnapReader)); + if (pReader == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) { + taosMemoryFree(pReader); + return -1; + } + + *ppReader = pReader; + + return 0; +} +int32_t streamSnapReaderClose(SStreamSnapReader* pReader) { + if (pReader == NULL) return 0; + + streamSnapHandleDestroy(&pReader->handle); + taosMemoryFree(pReader); + return 0; +} +int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) { + // impl later + int32_t code = 0; + SStreamSnapHandle* pHandle = &pReader->handle; + SBanckendFile* pFile = pHandle->pBackendFile; + + SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + + if (pHandle->fd == NULL) { + if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + // finish + *ppData = NULL; + *size = 0; + return 0; + } else { + pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + qDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + } + } + + qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); + int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + if (nread == -1) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, + item->type, tstrerror(code)); + return -1; + } else if (nread > 0 && nread <= kBlockSize) { + // left bytes less than kBlockSize + qDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + pHandle->offset += nread; + if (pHandle->offset >= item->size || nread < kBlockSize) { + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + } + } else { + qDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, + pHandle->currFileIdx); + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + + if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + // finish + *ppData = NULL; + *size = 0; + return 0; + } + item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + + nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + pHandle->offset += nread; + + qDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", + STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + } + + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf; + pHdr->size = nread; + pHdr->type = item->type; + pHdr->totalSize = item->size; + + memcpy(pHdr->name, item->name, strlen(item->name)); + pHandle->seraial += nread; + + *ppData = buf; + *size = sizeof(SStreamSnapBlockHdr) + nread; + return 0; +} +// SMetaSnapWriter ======================================== +int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter) { + // impl later + SStreamSnapWriter* pWriter = taosMemoryCalloc(1, sizeof(SStreamSnapWriter)); + if (pWriter == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + SStreamSnapHandle* pHandle = &pWriter->handle; + + SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); + pFile->path = taosStrdup(path); + SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); + + SBackendFileItem item; + item.name = taosStrdup((char*)ROCKSDB_CURRENT); + item.type = ROCKSDB_CURRENT_TYPE; + taosArrayPush(list, &item); + + pHandle->pBackendFile = pFile; + + pHandle->pFileList = list; + pHandle->currFileIdx = 0; + pHandle->offset = 0; + + *ppWriter = pWriter; + return 0; +} + +int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { + int32_t code = 0; + + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; + SStreamSnapHandle* pHandle = &pWriter->handle; + SBanckendFile* pFile = pHandle->pBackendFile; + SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + + if (pHandle->fd == NULL) { + pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pHandle->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, + tstrerror(code)); + } + } + + if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { + int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); + if (bytes != pHdr->size) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } + pHandle->offset += bytes; + } else { + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + + SBackendFileItem item; + item.name = taosStrdup(pHdr->name); + item.type = pHdr->type; + taosArrayPush(pHandle->pFileList, &item); + + SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pHandle->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, + tstrerror(code)); + } + + taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); + pHandle->offset += pHdr->size; + } + + // impl later + return 0; +} +int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { + SStreamSnapHandle* handle = &pWriter->handle; + if (qDebugFlag & DEBUG_DEBUG) { + char* buf = (char*)taosMemoryMalloc(1024); + int n = sprintf(buf, "["); + for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { + SBackendFileItem* item = taosArrayGet(handle->pFileList, i); + if (i != taosArrayGetSize(handle->pFileList) - 1) { + n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size); + } else { + n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size); + } + } + qDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); + taosMemoryFree(buf); + } + + for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { + SBackendFileItem* item = taosArrayGet(handle->pFileList, i); + taosMemoryFree(item->name); + } + + streamSnapHandleDestroy(handle); + taosMemoryFree(pWriter); + + return 0; +} diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 8694e5cf4c..83aed42fe2 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -108,7 +108,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz SStreamTask* pStreamTask = pTask; char statePath[1024]; if (!specPath) { - sprintf(statePath, "%s/%d", path, pStreamTask->id.taskId); + sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId); } else { memset(statePath, 0, 1024); tstrncpy(statePath, path, 1024); @@ -128,7 +128,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz if (uniqueId == NULL) { int code = streamStateOpenBackend(pMeta->streamBackend, pState); if (code == -1) { - taosReleaseRef(streamBackendId, pState->streamBackendRid); taosThreadMutexUnlock(&pMeta->backendMutex); taosMemoryFree(pState); return NULL; @@ -139,8 +138,9 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz int64_t id = *(int64_t*)uniqueId; pState->pTdbState->backendCfWrapperId = id; pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); - - taosAcquireRef(streamBackendId, pState->streamBackendRid); + // already exist stream task for + qInfo("already exist stream-state for %s", pState->pTdbState->idstr); + // taosAcquireRef(streamBackendId, pState->streamBackendRid); } taosThreadMutexUnlock(&pMeta->backendMutex); @@ -149,6 +149,8 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); + qInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, + pState->taskId); return pState; #else @@ -424,10 +426,15 @@ int32_t streamStateSaveInfo(SStreamState* pState, void* pKey, int32_t keyLen, vo code = streamStatePutBatch(pState, "default", batch, pKey, pVal, vLen, 0); if (code != 0) { + streamStateDestroyBatch(batch); return code; } code = streamStatePutBatch_rocksdb(pState, batch); streamStateDestroyBatch(batch); + // code = streamDefaultPut_rocksdb(pState, pKey, pVal, vLen); + // char* Val = NULL; + // int32_t len = 0; + // code = streamDefaultGet_rocksdb(pState, pKey, (void**)&Val, &len); return code; #else return 0; @@ -729,7 +736,8 @@ void streamStateFreeVal(void* val) { int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { #ifdef USE_ROCKSDB - qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); + qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + key->groupId); return streamStateSessionPut_rocksdb(pState, key, value, vLen); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; @@ -763,7 +771,8 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); + qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + key->groupId); return streamStateSessionDel_rocksdb(pState, key); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 232ca132ab..01dcb435c0 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -13,11 +13,12 @@ * along with this program. If not, see . */ -#include "streamInt.h" #include "executor.h" +#include "streamInt.h" +#include "tmisce.h" #include "tstream.h" -#include "wal.h" #include "ttimer.h" +#include "wal.h" static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); @@ -59,6 +60,7 @@ int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) if (tEncodeI32(pEncoder, pInfo->childId) < 0) return -1; /*if (tEncodeI64(pEncoder, pInfo->processedVer) < 0) return -1;*/ if (tEncodeSEpSet(pEncoder, &pInfo->epSet) < 0) return -1; + if (tEncodeI64(pEncoder, pInfo->stage) < 0) return -1; return 0; } @@ -68,6 +70,7 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { if (tDecodeI32(pDecoder, &pInfo->childId) < 0) return -1; /*if (tDecodeI64(pDecoder, &pInfo->processedVer) < 0) return -1;*/ if (tDecodeSEpSet(pDecoder, &pInfo->epSet) < 0) return -1; + if (tDecodeI64(pDecoder, &pInfo->stage) < 0) return -1; return 0; } @@ -87,9 +90,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pTask->info.mnodeEpset) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointId) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointVer) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1; if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1; @@ -102,10 +106,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; - int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t epSz = taosArrayGetSize(pTask->pUpstreamInfoList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } @@ -135,6 +139,41 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { return pEncoder->pos; } +int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) { + int64_t ver; + int64_t skip64; + int8_t skip8; + int32_t skip32; + int16_t skip16; + SEpSet epSet; + + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &ver) < 0) return -1; + + if (ver != SSTREAM_TASK_VER) return -1; + + if (tDecodeI64(pDecoder, &skip64) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI16(pDecoder, &skip16) < 0) return -1; + + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1; + + if (tDecodeI64(pDecoder, &pChkpInfo->checkpointId) < 0) return -1; + if (tDecodeI64(pDecoder, &pChkpInfo->checkpointVer) < 0) return -1; + + tEndDecode(pDecoder); + return 0; +} + int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->ver) < 0) return -1; @@ -153,9 +192,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pTask->info.mnodeEpset) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointId) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointVer) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1; @@ -171,7 +211,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { int32_t epSz = -1; if (tDecodeI32(pDecoder, &epSz) < 0) return -1; - pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES); + pTask->pUpstreamInfoList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); if (pInfo == NULL) return -1; @@ -179,7 +219,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { taosMemoryFreeClear(pInfo); return -1; } - taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo); + taosArrayPush(pTask->pUpstreamInfoList, &pInfo); } if (pTask->info.taskLevel != TASK_LEVEL__SINK) { @@ -215,11 +255,18 @@ static void freeItem(void* p) { rpcFreeCont(pInfo->msg.pCont); } +static void freeUpstreamItem(void* p) { + SStreamChildEpInfo** pInfo = p; + taosMemoryFree(*pInfo); +} + void tFreeStreamTask(SStreamTask* pTask) { - qDebug("free s-task:0x%x, %p", pTask->id.taskId, pTask); + int32_t taskId = pTask->id.taskId; + + qDebug("free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState); // remove the ref by timer - while(pTask->status.timerActive > 0) { + while (pTask->status.timerActive > 0) { qDebug("s-task:%s wait for task stop timer activities", pTask->id.idStr); taosMsleep(10); } @@ -256,21 +303,22 @@ void tFreeStreamTask(SStreamTask* pTask) { walCloseReader(pTask->exec.pWalReader); } - taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree); if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); tSimpleHashCleanup(pTask->tbSink.pTblInfo); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); - taosArrayDestroy(pTask->checkReqIds); - pTask->checkReqIds = NULL; + pTask->checkReqIds = taosArrayDestroy(pTask->checkReqIds); } if (pTask->pState) { + qDebug("s-task:0x%x start to free task state", taskId); streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); } + pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); + taosThreadMutexDestroy(&pTask->lock); if (pTask->msgInfo.pData != NULL) { destroyStreamDataBlock(pTask->msgInfo.pData); pTask->msgInfo.pData = NULL; @@ -289,6 +337,203 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->pRspMsgList = NULL; } + if (pTask->pUpstreamInfoList != NULL) { + taosArrayDestroyEx(pTask->pUpstreamInfoList, freeUpstreamItem); + pTask->pUpstreamInfoList = NULL; + } + taosThreadMutexDestroy(&pTask->lock); taosMemoryFree(pTask); + + qDebug("s-task:0x%x free task completed", taskId); } + +int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) { + pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); + pTask->refCnt = 1; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.timerActive = 0; + pTask->inputQueue = streamQueueOpen(512 << 10); + pTask->outputInfo.queue = streamQueueOpen(512 << 10); + + if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { + qError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); + return -1; + } + + pTask->tsInfo.init = taosGetTimestampMs(); + pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; + pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; + pTask->pMeta = pMeta; + + pTask->chkInfo.currentVer = ver; + pTask->dataRange.range.maxVer = ver; + pTask->dataRange.range.minVer = ver; + pTask->pMsgCb = pMsgCb; + + taosThreadMutexInit(&pTask->lock, NULL); + streamTaskOpenAllUpstreamInput(pTask); + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) { + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + return 0; + } else { + int32_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__TABLE) { + return 1; + } else { + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + return taosArrayGetSize(vgInfo); + } + } +} + +static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { + SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); + if (pEpInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pEpInfo->childId = pTask->info.selfChildId; + pEpInfo->epSet = pTask->info.epSet; + pEpInfo->nodeId = pTask->info.nodeId; + pEpInfo->taskId = pTask->id.taskId; + pEpInfo->stage = -1; + + return pEpInfo; +} + +int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) { + SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); + if (pEpInfo == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + if (pTask->pUpstreamInfoList == NULL) { + pTask->pUpstreamInfoList = taosArrayInit(4, POINTER_BYTES); + } + + taosArrayPush(pTask->pUpstreamInfoList, &pEpInfo); + return TSDB_CODE_SUCCESS; +} + +void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) { + char buf[512] = {0}; + EPSET_TO_STR(pEpSet, buf); + + int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < numOfUpstream; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + if (pInfo->nodeId == nodeId) { + epsetAssign(&pInfo->epSet, pEpSet); + qDebug("s-task:0x%x update the upstreamInfo, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + break; + } + } +} + +void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) { + STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + pDispatcher->taskId = pDownstreamTask->id.taskId; + pDispatcher->nodeId = pDownstreamTask->info.nodeId; + pDispatcher->epSet = pDownstreamTask->info.epSet; + + pTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH; + pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; +} + +void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) { + char buf[512] = {0}; + EPSET_TO_STR(pEpSet, buf); + + int8_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + + int32_t numOfVgroups = taosArrayGetSize(pVgs); + for (int32_t i = 0; i < numOfVgroups; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i); + + if (pVgInfo->vgId == nodeId) { + epsetAssign(&pVgInfo->epSet, pEpSet); + qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + break; + } + } + } else if (type == TASK_OUTPUT__FIXED_DISPATCH) { + STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + if (pDispatcher->nodeId == nodeId) { + epsetAssign(&pDispatcher->epSet, pEpSet); + qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpSet:%s", pTask->id.taskId, nodeId, buf); + } + } else { + // do nothing + } +} + +int32_t streamTaskStop(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + int64_t st = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + + pTask->status.taskStatus = TASK_STATUS__STOP; + qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + + while (/*pTask->status.schedStatus != TASK_SCHED_STATUS__INACTIVE */!streamTaskIsIdle(pTask)) { + qDebug("s-task:%s level:%d wait for task to be idle, check again in 100ms", id, pTask->info.taskLevel); + taosMsleep(100); + } + + int64_t el = taosGetTimestampMs() - st; + qDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pMeta->vgId, pTask->id.idStr, el); + return 0; +} + +int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) { + char buf[512] = {0}; + + if (pTask->info.nodeId == nodeId) { // execution task should be moved away + epsetAssign(&pTask->info.epSet, pEpSet); + EPSET_TO_STR(pEpSet, buf) + qDebug("s-task:0x%x (vgId:%d) self node epset is updated %s", pTask->id.taskId, nodeId, buf); + } + + // check for the dispath info and the upstream task info + int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__SOURCE) { + streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet); + } else if (level == TASK_LEVEL__AGG) { + streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet); + streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet); + } else { // TASK_LEVEL__SINK + streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet); + } + + return 0; +} + +int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { + for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { + SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); + doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp); + } + return 0; +} + +void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + return; + } + + int32_t size = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < size; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + pInfo->stage = -1; + } + + qDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr); +} \ No newline at end of file diff --git a/source/libs/stream/src/streamUpdate.c b/source/libs/stream/src/streamUpdate.c index 7a8de91d77..f9ab672c4b 100644 --- a/source/libs/stream/src/streamUpdate.c +++ b/source/libs/stream/src/streamUpdate.c @@ -89,11 +89,11 @@ static int64_t adjustWatermark(int64_t adjInterval, int64_t originInt, int64_t w return watermark; } -SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark) { - return updateInfoInit(pInterval->interval, pInterval->precision, watermark); +SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp) { + return updateInfoInit(pInterval->interval, pInterval->precision, watermark, igUp); } -SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark) { +SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp) { SUpdateInfo *pInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); if (pInfo == NULL) { return NULL; @@ -104,30 +104,33 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma pInfo->interval = adjustInterval(interval, precision); pInfo->watermark = adjustWatermark(pInfo->interval, interval, watermark); - uint64_t bfSize = (uint64_t)(pInfo->watermark / pInfo->interval); + uint64_t bfSize = 0; + if (!igUp) { + bfSize = (uint64_t)(pInfo->watermark / pInfo->interval); - pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *)); - if (pInfo->pTsSBFs == NULL) { - updateInfoDestroy(pInfo); - return NULL; + pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *)); + if (pInfo->pTsSBFs == NULL) { + updateInfoDestroy(pInfo); + return NULL; + } + windowSBfAdd(pInfo, bfSize); + + pInfo->pTsBuckets = taosArrayInit(DEFAULT_BUCKET_SIZE, sizeof(TSKEY)); + if (pInfo->pTsBuckets == NULL) { + updateInfoDestroy(pInfo); + return NULL; + } + + TSKEY dumy = 0; + for (uint64_t i = 0; i < DEFAULT_BUCKET_SIZE; ++i) { + taosArrayPush(pInfo->pTsBuckets, &dumy); + } + pInfo->numBuckets = DEFAULT_BUCKET_SIZE; + pInfo->pCloseWinSBF = NULL; + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT); + pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK); } pInfo->numSBFs = bfSize; - windowSBfAdd(pInfo, bfSize); - - pInfo->pTsBuckets = taosArrayInit(DEFAULT_BUCKET_SIZE, sizeof(TSKEY)); - if (pInfo->pTsBuckets == NULL) { - updateInfoDestroy(pInfo); - return NULL; - } - - TSKEY dumy = 0; - for (uint64_t i = 0; i < DEFAULT_BUCKET_SIZE; ++i) { - taosArrayPush(pInfo->pTsBuckets, &dumy); - } - pInfo->numBuckets = DEFAULT_BUCKET_SIZE; - pInfo->pCloseWinSBF = NULL; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT); - pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK); pInfo->maxDataVersion = 0; return pInfo; } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index dd857141c1..bca9dcabda 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -49,7 +49,8 @@ struct SStreamFileState { typedef SRowBuffPos SRowBuffInfo; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) { + GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, + int64_t checkpointId) { if (memSize <= 0) { memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE; } @@ -83,9 +84,9 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ pFileState->deleteMark = delMark; pFileState->flushMark = INT64_MIN; pFileState->maxTs = INT64_MIN; - pFileState->id = taosStrdup(idstr); + pFileState->id = taosStrdup(taskId); - recoverSnapshot(pFileState); + recoverSnapshot(pFileState, checkpointId); return pFileState; _error: @@ -385,7 +386,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, 0, buf); // todo handle failure memset(buf, 0, len); -// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); + // qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); } taosMemoryFree(buf); @@ -396,8 +397,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, streamStateClearBatch(batch); int64_t elapsed = taosGetTimestampMs() - st; - qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems, - BATCH_LIMIT, elapsed); + qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%" PRId64 "ms", + pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { const char* taskKey = "streamFileState"; @@ -479,7 +480,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { return code; } -int32_t recoverSnapshot(SStreamFileState* pFileState) { +int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { int32_t code = TSDB_CODE_SUCCESS; if (pFileState->maxTs != INT64_MIN) { int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs) @@ -487,8 +488,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) { : pFileState->maxTs - pFileState->deleteMark; deleteExpiredCheckPoint(pFileState, mark); } - void* pStVal = NULL; - int32_t len = 0; SWinKey key = {.groupId = 0, .ts = 0}; SStreamStateCur* pCur = streamStateSeekToLast_rocksdb(pFileState->pFileStore, &key); @@ -508,9 +507,12 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) { destroyRowBuffPos(pNewPos); SListNode* pNode = tdListPopTail(pFileState->usedBuffs); taosMemoryFreeClear(pNode); + taosMemoryFreeClear(pVal); break; } + ASSERT(pVLen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, pVLen); + taosMemoryFreeClear(pVal); code = tSimpleHashPut(pFileState->rowBuffMap, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); if (code != TSDB_CODE_SUCCESS) { destroyRowBuffPos(pNewPos); diff --git a/source/libs/stream/test/tstreamUpdateTest.cpp b/source/libs/stream/test/tstreamUpdateTest.cpp index 0e84d6b8bd..f63939ac9e 100644 --- a/source/libs/stream/test/tstreamUpdateTest.cpp +++ b/source/libs/stream/test/tstreamUpdateTest.cpp @@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test { protected: virtual void SetUp() { streamMetaInit(); - backend = streamBackendInit(path); + backend = streamBackendInit(path, 0); } virtual void TearDown() { streamMetaCleanup(); diff --git a/source/libs/tdb/test/tdbPageRecycleTest.cpp b/source/libs/tdb/test/tdbPageRecycleTest.cpp index 4d7b314917..d740bd0f94 100644 --- a/source/libs/tdb/test/tdbPageRecycleTest.cpp +++ b/source/libs/tdb/test/tdbPageRecycleTest.cpp @@ -804,7 +804,7 @@ TEST(TdbPageRecycleTest, recycly_delete_interior_ofp_nocommit) { // sprintf(&key[count - 2], "%c", i); key[count - 2] = '0' + i; - ret = tdbTbInsert(pDb, key, count, NULL, NULL, txn); + ret = tdbTbInsert(pDb, key, count, NULL, 0, txn); GTEST_ASSERT_EQ(ret, 0); } } diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index a6b7a20f76..17ef6ce530 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -262,7 +262,6 @@ bool transAsyncPoolIsEmpty(SAsyncPool* pool); #define ASYNC_CHECK_HANDLE(exh1, id) \ do { \ if (id > 0) { \ - tTrace("handle step1"); \ SExHandle* exh2 = transAcquireExHandle(transGetRefMgt(), id); \ if (exh2 == NULL || id != exh2->refId) { \ tTrace("handle %p except, may already freed, ignore msg, ref1:%" PRIu64 ", ref2:%" PRIu64, exh1, \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index cfdc5b5e8b..b02c8aad26 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -12,7 +12,10 @@ * along with this program. If not, see . */ +// clang-format off #include "transComm.h" +#include "tmisce.h" +// clang-format on typedef struct { int32_t numOfConn; @@ -308,19 +311,6 @@ static void cliWalkCb(uv_handle_t* handle, void* arg); } \ } while (0) -#define EPSET_DEBUG_STR(epSet, tbuf) \ - do { \ - int len = snprintf(tbuf, sizeof(tbuf), "epset:{"); \ - for (int i = 0; i < (epSet)->numOfEps; i++) { \ - if (i == (epSet)->numOfEps - 1) { \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } else { \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } \ - } \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "}, inUse:%d", (epSet)->inUse); \ - } while (0); - static void* cliWorkThread(void* arg); static void cliReleaseUnfinishedMsg(SCliConn* conn) { @@ -1268,7 +1258,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - if (status == -1) status = ENETUNREACH; + if (status == -1) status = UV_EADDRNOTAVAIL; if (pConn->pBatch == NULL) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); @@ -2167,7 +2157,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { if (rpcDebugFlag & DEBUG_DEBUG) { STraceId* trace = &pMsg->msg.info.traceId; char tbuf[256] = {0}; - EPSET_DEBUG_STR(&pCtx->epSet, tbuf); + EPSET_TO_STR(&pCtx->epSet, tbuf); tGDebug("%s retry on next node,use:%s, step: %d,timeout:%" PRId64 "", transLabel(pThrd->pTransInst), tbuf, pCtx->retryStep, pCtx->retryNextInterval); } @@ -2396,7 +2386,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (hasEpSet) { if (rpcDebugFlag & DEBUG_TRACE) { char tbuf[256] = {0}; - EPSET_DEBUG_STR(&pCtx->epSet, tbuf); + EPSET_TO_STR(&pCtx->epSet, tbuf); tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); } } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 40610d7651..a53830723c 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -196,8 +196,6 @@ static bool uvHandleReq(SSvrConn* pConn) { tError("%s conn %p recv invalid packet, failed to decompress", transLabel(pTransInst), pConn); return false; } - tDebug("head version: %d 2", pHead->version); - pHead->code = htonl(pHead->code); pHead->msgLen = htonl(pHead->msgLen); @@ -727,7 +725,6 @@ void uvOnAcceptCb(uv_stream_t* stream, int status) { } } void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) { - tTrace("connection coming"); if (nread < 0) { if (nread != UV_EOF) { tError("read error %s", uv_err_name(nread)); diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index f5e15e7436..8e7c0f9584 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -319,7 +319,7 @@ SArray* taosArrayDup(const SArray* pSrc, __array_item_dup_fn_t fn) { if (NULL == pSrc) { return NULL; } - + if (pSrc->size == 0) { // empty array list return taosArrayInit(8, pSrc->elemSize); } @@ -360,6 +360,23 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*)) { pArray->size = 0; } +void taosArrayClearP(SArray* pArray, void (*fp)(void*)) { + // if (pArray == NULL) return; + // if (fp == NULL) { + // pArray->size = 0; + // return; + // } + + // for (int32_t i = 0; i < pArray->size; ++i) { + // fp(TARRAY_GET_ELEM(pArray, i)); + // } + if (pArray) { + for (int32_t i = 0; i < pArray->size; i++) { + fp(*(void**)TARRAY_GET_ELEM(pArray, i)); + } + } + taosArrayClear(pArray); +} void* taosArrayDestroy(SArray* pArray) { if (pArray) { diff --git a/source/util/src/tref.c b/source/util/src/tref.c index e70e12b37b..1bd3099b2d 100644 --- a/source/util/src/tref.c +++ b/source/util/src/tref.c @@ -181,7 +181,8 @@ int64_t taosAddRef(int32_t rsetId, void *p) { if (pSet->nodeList[hash]) pSet->nodeList[hash]->prev = pNode; pSet->nodeList[hash] = pNode; - uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d", rsetId, p, rid, pSet->count); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d, remain count:%d", rsetId, p, rid, pSet->count, + pNode->count); taosUnlockList(pSet->lockedBy + hash); @@ -235,7 +236,7 @@ void *taosAcquireRef(int32_t rsetId, int64_t rid) { if (pNode->removed == 0) { pNode->count++; p = pNode->p; - uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired", rsetId, pNode->p, rid); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired, remain count:%d", rsetId, pNode->p, rid, pNode->count); } else { terrno = TSDB_CODE_REF_NOT_EXIST; uTrace("rsetId:%d p:%p rid:%" PRId64 " is already removed, failed to acquire", rsetId, pNode->p, rid); diff --git a/stream b/stream deleted file mode 100644 index 3eafb580a3..0000000000 --- a/stream +++ /dev/null @@ -1,1137 +0,0 @@ -3986:03/29 15:26:32.540895 00099488 QRY streamStateSetNumber, seq: 1 -3987:03/29 15:26:32.540937 00099488 QRY streamStateSetNumber, seq: 2 -4437:03/29 15:26:32.601303 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -4438:03/29 15:26:32.601553 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -4439:03/29 15:26:32.601569 00099504 QRY streamStateReleaseBuf -4447:03/29 15:26:32.601725 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -4448:03/29 15:26:32.601752 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -4452:03/29 15:26:32.601817 00099504 QRY streamStateReleaseBuf -4460:03/29 15:26:32.601919 00099504 QRY streamStateGetCur_rocksdb -4461:03/29 15:26:32.601965 00099504 QRY streamStateCurPrev_rocksdb -4462:03/29 15:26:32.602051 00099504 QRY streamStateGetKVByCur_rocksdb -4464:03/29 15:26:32.602085 00099504 QRY streamStateFreeCur -4956:03/29 15:26:32.815478 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -5010:03/29 15:26:32.816584 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -5018:03/29 15:26:32.816701 00099504 QRY streamStateGetCur_rocksdb -5019:03/29 15:26:32.816760 00099504 QRY streamStateFreeCur -5020:03/29 15:26:32.816785 00099504 QRY streamStateCurPrev_rocksdb -5540:03/29 15:26:33.041742 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -5541:03/29 15:26:33.041831 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -5542:03/29 15:26:33.041853 00099504 QRY streamStateReleaseBuf -5549:03/29 15:26:33.041979 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -5595:03/29 15:26:33.043127 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -5596:03/29 15:26:33.043186 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -5597:03/29 15:26:33.043206 00099504 QRY streamStateReleaseBuf -5626:03/29 15:26:33.044578 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -5627:03/29 15:26:33.044611 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -5631:03/29 15:26:33.044676 00099504 QRY streamStateReleaseBuf -5639:03/29 15:26:33.044769 00099504 QRY streamStateGetCur_rocksdb -5640:03/29 15:26:33.044817 00099504 QRY streamStateCurPrev_rocksdb -5641:03/29 15:26:33.044842 00099504 QRY streamStateGetKVByCur_rocksdb -5643:03/29 15:26:33.044940 00099504 QRY streamStateFreeCur -6298:03/29 15:26:33.270625 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6303:03/29 15:26:33.270831 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6304:03/29 15:26:33.271088 00099504 QRY streamStateReleaseBuf -6352:03/29 15:26:33.272286 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -6402:03/29 15:26:33.274181 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -6403:03/29 15:26:33.274262 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6404:03/29 15:26:33.274284 00099504 QRY streamStateReleaseBuf -6427:03/29 15:26:33.274651 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6428:03/29 15:26:33.274676 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -6432:03/29 15:26:33.274719 00099504 QRY streamStateReleaseBuf -6440:03/29 15:26:33.275020 00099504 QRY streamStateGetCur_rocksdb -6441:03/29 15:26:33.275072 00099504 QRY streamStateCurPrev_rocksdb -6442:03/29 15:26:33.275088 00099504 QRY streamStateGetKVByCur_rocksdb -6444:03/29 15:26:33.275106 00099504 QRY streamStateFreeCur -6754:03/29 15:26:33.280321 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6755:03/29 15:26:33.280420 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6756:03/29 15:26:33.280440 00099504 QRY streamStateReleaseBuf -6764:03/29 15:26:33.280565 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6765:03/29 15:26:33.280586 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -6769:03/29 15:26:33.280618 00099504 QRY streamStateReleaseBuf -6777:03/29 15:26:33.280705 00099504 QRY streamStateGetCur_rocksdb -6778:03/29 15:26:33.280760 00099504 QRY streamStateCurPrev_rocksdb -6779:03/29 15:26:33.280786 00099504 QRY streamStateGetKVByCur_rocksdb -6781:03/29 15:26:33.280889 00099504 QRY streamStateFreeCur -6999:03/29 15:26:33.286714 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7000:03/29 15:26:33.286811 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7001:03/29 15:26:33.286831 00099504 QRY streamStateReleaseBuf -7009:03/29 15:26:33.287090 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7010:03/29 15:26:33.287123 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7014:03/29 15:26:33.287196 00099504 QRY streamStateReleaseBuf -7022:03/29 15:26:33.287315 00099504 QRY streamStateGetCur_rocksdb -7023:03/29 15:26:33.287370 00099504 QRY streamStateCurPrev_rocksdb -7024:03/29 15:26:33.287395 00099504 QRY streamStateGetKVByCur_rocksdb -7026:03/29 15:26:33.287437 00099504 QRY streamStateFreeCur -7324:03/29 15:26:33.498096 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -7374:03/29 15:26:33.499363 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -7375:03/29 15:26:33.499439 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7376:03/29 15:26:33.499462 00099504 QRY streamStateReleaseBuf -7408:03/29 15:26:33.500618 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7409:03/29 15:26:33.500674 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7413:03/29 15:26:33.500778 00099504 QRY streamStateReleaseBuf -7421:03/29 15:26:33.501007 00099504 QRY streamStateGetCur_rocksdb -7422:03/29 15:26:33.501071 00099504 QRY streamStateCurPrev_rocksdb -7423:03/29 15:26:33.501105 00099504 QRY streamStateGetKVByCur_rocksdb -7425:03/29 15:26:33.501158 00099504 QRY streamStateFreeCur -7920:03/29 15:26:33.722388 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -7921:03/29 15:26:33.722476 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7922:03/29 15:26:33.722492 00099504 QRY streamStateReleaseBuf -7930:03/29 15:26:33.722638 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -7931:03/29 15:26:33.722659 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7935:03/29 15:26:33.722727 00099504 QRY streamStateReleaseBuf -7943:03/29 15:26:33.722841 00099504 QRY streamStateGetCur_rocksdb -7944:03/29 15:26:33.722962 00099504 QRY streamStateCurPrev_rocksdb -7945:03/29 15:26:33.722998 00099504 QRY streamStateGetKVByCur_rocksdb -7947:03/29 15:26:33.723024 00099504 QRY streamStateFreeCur -8177:03/29 15:26:33.728574 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8178:03/29 15:26:33.728652 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8179:03/29 15:26:33.728663 00099504 QRY streamStateReleaseBuf -8197:03/29 15:26:33.729022 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8198:03/29 15:26:33.729077 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8202:03/29 15:26:33.729177 00099504 QRY streamStateReleaseBuf -8211:03/29 15:26:33.729312 00099504 QRY streamStateGetCur_rocksdb -8213:03/29 15:26:33.729372 00099504 QRY streamStateCurPrev_rocksdb -8214:03/29 15:26:33.729427 00099504 QRY streamStateGetKVByCur_rocksdb -8216:03/29 15:26:33.729456 00099504 QRY streamStateFreeCur -8460:03/29 15:26:33.735166 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8461:03/29 15:26:33.735260 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8462:03/29 15:26:33.735282 00099504 QRY streamStateReleaseBuf -8470:03/29 15:26:33.735474 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8471:03/29 15:26:33.735516 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8475:03/29 15:26:33.735586 00099504 QRY streamStateReleaseBuf -8483:03/29 15:26:33.735713 00099504 QRY streamStateGetCur_rocksdb -8484:03/29 15:26:33.735783 00099504 QRY streamStateCurPrev_rocksdb -8485:03/29 15:26:33.735812 00099504 QRY streamStateGetKVByCur_rocksdb -8487:03/29 15:26:33.735845 00099504 QRY streamStateFreeCur -8717:03/29 15:26:33.741222 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8718:03/29 15:26:33.741315 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8719:03/29 15:26:33.741337 00099504 QRY streamStateReleaseBuf -8727:03/29 15:26:33.741534 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8728:03/29 15:26:33.741576 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8732:03/29 15:26:33.741669 00099504 QRY streamStateReleaseBuf -8740:03/29 15:26:33.741779 00099504 QRY streamStateGetCur_rocksdb -8741:03/29 15:26:33.741912 00099504 QRY streamStateCurPrev_rocksdb -8742:03/29 15:26:33.741938 00099504 QRY streamStateGetKVByCur_rocksdb -8744:03/29 15:26:33.741968 00099504 QRY streamStateFreeCur -9392:03/29 15:26:34.177492 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -9446:03/29 15:26:34.178584 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -9454:03/29 15:26:34.178719 00099504 QRY streamStateGetCur_rocksdb -9455:03/29 15:26:34.178777 00099504 QRY streamStateCurPrev_rocksdb -9456:03/29 15:26:34.178803 00099504 QRY streamStateGetKVByCur_rocksdb -9458:03/29 15:26:34.178844 00099504 QRY streamStateFreeCur -10007:03/29 15:26:34.396214 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10008:03/29 15:26:34.396312 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10009:03/29 15:26:34.396334 00099504 QRY streamStateReleaseBuf -10016:03/29 15:26:34.396441 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10063:03/29 15:26:34.397335 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10064:03/29 15:26:34.397396 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10065:03/29 15:26:34.397413 00099504 QRY streamStateReleaseBuf -10099:03/29 15:26:34.398077 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10104:03/29 15:26:34.398102 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10109:03/29 15:26:34.398252 00099504 QRY streamStateReleaseBuf -10121:03/29 15:26:34.398444 00099504 QRY streamStateGetCur_rocksdb -10125:03/29 15:26:34.398501 00099504 QRY streamStateCurPrev_rocksdb -10126:03/29 15:26:34.398614 00099504 QRY streamStateGetKVByCur_rocksdb -10135:03/29 15:26:34.398654 00099504 QRY streamStateFreeCur -10306:03/29 15:26:34.402709 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10307:03/29 15:26:34.402803 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10308:03/29 15:26:34.402825 00099504 QRY streamStateReleaseBuf -10315:03/29 15:26:34.402978 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10385:03/29 15:26:34.404520 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10390:03/29 15:26:34.404599 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10392:03/29 15:26:34.404623 00099504 QRY streamStateReleaseBuf -10447:03/29 15:26:34.405786 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10448:03/29 15:26:34.405806 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10452:03/29 15:26:34.405862 00099504 QRY streamStateReleaseBuf -10460:03/29 15:26:34.405976 00099504 QRY streamStateGetCur_rocksdb -10461:03/29 15:26:34.406033 00099504 QRY streamStateCurPrev_rocksdb -10462:03/29 15:26:34.406059 00099504 QRY streamStateGetKVByCur_rocksdb -10464:03/29 15:26:34.406085 00099504 QRY streamStateFreeCur -10613:03/29 15:26:34.409146 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10615:03/29 15:26:34.409218 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10616:03/29 15:26:34.409266 00099504 QRY streamStateReleaseBuf -10626:03/29 15:26:34.409404 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10708:03/29 15:26:34.410801 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10710:03/29 15:26:34.410940 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10712:03/29 15:26:34.410959 00099504 QRY streamStateReleaseBuf -10754:03/29 15:26:34.411727 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10755:03/29 15:26:34.411750 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10759:03/29 15:26:34.411828 00099504 QRY streamStateReleaseBuf -10767:03/29 15:26:34.411929 00099504 QRY streamStateGetCur_rocksdb -10768:03/29 15:26:34.411976 00099504 QRY streamStateCurPrev_rocksdb -10769:03/29 15:26:34.412065 00099504 QRY streamStateGetKVByCur_rocksdb -10771:03/29 15:26:34.412093 00099504 QRY streamStateFreeCur -10919:03/29 15:26:34.415336 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10924:03/29 15:26:34.415401 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10925:03/29 15:26:34.415530 00099504 QRY streamStateReleaseBuf -10939:03/29 15:26:34.415814 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -11037:03/29 15:26:34.417653 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -11038:03/29 15:26:34.417730 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11039:03/29 15:26:34.417749 00099504 QRY streamStateReleaseBuf -11062:03/29 15:26:34.418366 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -11063:03/29 15:26:34.418387 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11067:03/29 15:26:34.418436 00099504 QRY streamStateReleaseBuf -11075:03/29 15:26:34.418564 00099504 QRY streamStateGetCur_rocksdb -11076:03/29 15:26:34.418619 00099504 QRY streamStateCurPrev_rocksdb -11077:03/29 15:26:34.418642 00099504 QRY streamStateGetKVByCur_rocksdb -11079:03/29 15:26:34.418671 00099504 QRY streamStateFreeCur -11266:03/29 15:26:34.421834 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11271:03/29 15:26:34.421905 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11273:03/29 15:26:34.421927 00099504 QRY streamStateReleaseBuf -11287:03/29 15:26:34.422121 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -11342:03/29 15:26:34.423145 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11343:03/29 15:26:34.423200 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11344:03/29 15:26:34.423217 00099504 QRY streamStateReleaseBuf -11367:03/29 15:26:34.423626 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11368:03/29 15:26:34.423647 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11372:03/29 15:26:34.423700 00099504 QRY streamStateReleaseBuf -11380:03/29 15:26:34.423816 00099504 QRY streamStateGetCur_rocksdb -11381:03/29 15:26:34.423856 00099504 QRY streamStateCurPrev_rocksdb -11382:03/29 15:26:34.423935 00099504 QRY streamStateGetKVByCur_rocksdb -11384:03/29 15:26:34.423968 00099504 QRY streamStateFreeCur -11678:03/29 15:26:34.428786 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11680:03/29 15:26:34.428835 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11681:03/29 15:26:34.428879 00099504 QRY streamStateReleaseBuf -11693:03/29 15:26:34.429033 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -11744:03/29 15:26:34.429959 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11745:03/29 15:26:34.430023 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11746:03/29 15:26:34.430042 00099504 QRY streamStateReleaseBuf -11769:03/29 15:26:34.430502 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11770:03/29 15:26:34.430533 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11774:03/29 15:26:34.430602 00099504 QRY streamStateReleaseBuf -11782:03/29 15:26:34.430710 00099504 QRY streamStateGetCur_rocksdb -11783:03/29 15:26:34.430752 00099504 QRY streamStateCurPrev_rocksdb -11784:03/29 15:26:34.430773 00099504 QRY streamStateGetKVByCur_rocksdb -11786:03/29 15:26:34.430787 00099504 QRY streamStateFreeCur -11999:03/29 15:26:34.434262 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -12000:03/29 15:26:34.434332 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12001:03/29 15:26:34.434348 00099504 QRY streamStateReleaseBuf -12008:03/29 15:26:34.434438 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -12061:03/29 15:26:34.435131 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -12062:03/29 15:26:34.435269 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12063:03/29 15:26:34.435281 00099504 QRY streamStateReleaseBuf -12098:03/29 15:26:34.435777 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -12099:03/29 15:26:34.435795 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12108:03/29 15:26:34.435970 00099504 QRY streamStateReleaseBuf -12126:03/29 15:26:34.436125 00099504 QRY streamStateGetCur_rocksdb -12131:03/29 15:26:34.436229 00099504 QRY streamStateCurPrev_rocksdb -12132:03/29 15:26:34.436312 00099504 QRY streamStateGetKVByCur_rocksdb -12134:03/29 15:26:34.436333 00099504 QRY streamStateFreeCur -12305:03/29 15:26:34.439031 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] failed to read from default, err: not exist -12306:03/29 15:26:34.439071 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12307:03/29 15:26:34.439085 00099504 QRY streamStateReleaseBuf -12315:03/29 15:26:34.439182 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12316:03/29 15:26:34.439203 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12320:03/29 15:26:34.439259 00099504 QRY streamStateReleaseBuf -12328:03/29 15:26:34.439338 00099504 QRY streamStateGetCur_rocksdb -12329:03/29 15:26:34.439364 00099504 QRY streamStateCurPrev_rocksdb -12330:03/29 15:26:34.439371 00099504 QRY streamStateGetKVByCur_rocksdb -12332:03/29 15:26:34.439397 00099504 QRY streamStateFreeCur -12538:03/29 15:26:34.442517 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12539:03/29 15:26:34.442565 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12540:03/29 15:26:34.442581 00099504 QRY streamStateReleaseBuf -12548:03/29 15:26:34.442693 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12549:03/29 15:26:34.442715 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12553:03/29 15:26:34.442767 00099504 QRY streamStateReleaseBuf -12561:03/29 15:26:34.442840 00099504 QRY streamStateGetCur_rocksdb -12562:03/29 15:26:34.442871 00099504 QRY streamStateCurPrev_rocksdb -12563:03/29 15:26:34.442885 00099504 QRY streamStateGetKVByCur_rocksdb -12565:03/29 15:26:34.442911 00099504 QRY streamStateFreeCur -12794:03/29 15:26:34.446874 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12795:03/29 15:26:34.446925 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12796:03/29 15:26:34.446938 00099504 QRY streamStateReleaseBuf -12804:03/29 15:26:34.447053 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12805:03/29 15:26:34.447122 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12809:03/29 15:26:34.447183 00099504 QRY streamStateReleaseBuf -12817:03/29 15:26:34.447263 00099504 QRY streamStateGetCur_rocksdb -12818:03/29 15:26:34.447295 00099504 QRY streamStateCurPrev_rocksdb -12819:03/29 15:26:34.447311 00099504 QRY streamStateGetKVByCur_rocksdb -12821:03/29 15:26:34.447336 00099504 QRY streamStateFreeCur -13002:03/29 15:26:34.449990 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13028:03/29 15:26:34.450040 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13029:03/29 15:26:34.450506 00099504 QRY streamStateReleaseBuf -13037:03/29 15:26:34.450627 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13038:03/29 15:26:34.450642 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13042:03/29 15:26:34.450681 00099504 QRY streamStateReleaseBuf -13050:03/29 15:26:34.450772 00099504 QRY streamStateGetCur_rocksdb -13051:03/29 15:26:34.450806 00099504 QRY streamStateCurPrev_rocksdb -13052:03/29 15:26:34.450825 00099504 QRY streamStateGetKVByCur_rocksdb -13054:03/29 15:26:34.450845 00099504 QRY streamStateFreeCur -13375:03/29 15:26:34.456264 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -13376:03/29 15:26:34.456301 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -13377:03/29 15:26:34.456339 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to del from default -13425:03/29 15:26:34.456949 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -13426:03/29 15:26:34.456986 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13427:03/29 15:26:34.456999 00099504 QRY streamStateReleaseBuf -13428:03/29 15:26:34.457026 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] failed to read from default, err: not exist -13429:03/29 15:26:34.457072 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13430:03/29 15:26:34.457089 00099504 QRY streamStateReleaseBuf -13452:03/29 15:26:34.457437 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13460:03/29 15:26:34.457548 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -13461:03/29 15:26:34.457569 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13465:03/29 15:26:34.457632 00099504 QRY streamStateReleaseBuf -13466:03/29 15:26:34.457659 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13467:03/29 15:26:34.457680 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13471:03/29 15:26:34.457742 00099504 QRY streamStateReleaseBuf -13480:03/29 15:26:34.457838 00099504 QRY streamStateGetCur_rocksdb -13481:03/29 15:26:34.457875 00099504 QRY streamStateCurPrev_rocksdb -13482:03/29 15:26:34.457894 00099504 QRY streamStateGetKVByCur_rocksdb -13484:03/29 15:26:34.457925 00099504 QRY streamStateFreeCur -24389:03/29 15:26:35.766200 00099571 QRY streamStateSetNumber, seq: 3 -24390:03/29 15:26:35.766226 00099571 QRY streamStateSetNumber, seq: 4 -24406:03/29 15:26:35.768111 00099577 QRY streamStateSetNumber, seq: 5 -24407:03/29 15:26:35.768140 00099577 QRY streamStateSetNumber, seq: 6 -24423:03/29 15:26:35.768472 00099565 QRY streamStateSetNumber, seq: 7 -24424:03/29 15:26:35.768494 00099565 QRY streamStateSetNumber, seq: 8 -24427:03/29 15:26:35.769698 00099560 QRY streamStateSetNumber, seq: 9 -24428:03/29 15:26:35.769809 00099560 QRY streamStateSetNumber, seq: 10 -24429:03/29 15:26:35.769968 00099560 QRY streamStateSetNumber, seq: 11 -24430:03/29 15:26:35.770071 00099560 QRY streamStateSetNumber, seq: 12 -24431:03/29 15:26:35.770183 00099560 QRY streamStateSetNumber, seq: 13 -24432:03/29 15:26:35.770289 00099560 QRY streamStateSetNumber, seq: 14 -24433:03/29 15:26:35.770401 00099560 QRY streamStateSetNumber, seq: 15 -24434:03/29 15:26:35.770510 00099560 QRY streamStateSetNumber, seq: 16 -24435:03/29 15:26:35.770621 00099560 QRY streamStateSetNumber, seq: 17 -24698:03/29 15:26:35.877143 00099560 QRY streamStateSetNumber, seq: 18 -24699:03/29 15:26:35.877173 00099560 QRY streamStateSetNumber, seq: 19 -25022:03/29 15:26:35.935568 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -25023:03/29 15:26:35.936534 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -25024:03/29 15:26:35.936554 00099504 QRY streamStateReleaseBuf -25031:03/29 15:26:35.936693 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -25032:03/29 15:26:35.936721 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25036:03/29 15:26:35.936786 00099504 QRY streamStateReleaseBuf -25045:03/29 15:26:35.936910 00099504 QRY streamStateClear_rocksdb seq:19 -25131:03/29 15:26:35.940598 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -25150:03/29 15:26:35.941187 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -25151:03/29 15:26:35.941206 00099569 QRY streamStateReleaseBuf -25152:03/29 15:26:35.941228 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -25153:03/29 15:26:35.941272 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -25154:03/29 15:26:35.941289 00099569 QRY streamStateReleaseBuf -25163:03/29 15:26:35.941439 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25164:03/29 15:26:35.941454 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25168:03/29 15:26:35.941496 00099569 QRY streamStateReleaseBuf -25177:03/29 15:26:35.941606 00099569 QRY streamStateGetCur_rocksdb -25178:03/29 15:26:35.941647 00099569 QRY streamStateCurPrev_rocksdb -25179:03/29 15:26:35.941662 00099569 QRY streamStateGetKVByCur_rocksdb -25181:03/29 15:26:35.941683 00099569 QRY streamStateFreeCur -25454:03/29 15:26:35.946084 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -25604:03/29 15:26:35.948604 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -25605:03/29 15:26:35.948628 00099593 QRY streamStateReleaseBuf -25612:03/29 15:26:35.948791 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -25613:03/29 15:26:35.948825 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25617:03/29 15:26:35.948957 00099593 QRY streamStateReleaseBuf -25626:03/29 15:26:35.949089 00099593 QRY streamStateClear_rocksdb seq:8 -25719:03/29 15:26:35.951052 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25725:03/29 15:26:35.951164 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -25726:03/29 15:26:35.951176 00099575 QRY streamStateReleaseBuf -25727:03/29 15:26:35.951218 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -25729:03/29 15:26:35.951260 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -25731:03/29 15:26:35.951276 00099575 QRY streamStateReleaseBuf -25740:03/29 15:26:35.951424 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25741:03/29 15:26:35.951448 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25745:03/29 15:26:35.951498 00099575 QRY streamStateReleaseBuf -25754:03/29 15:26:35.951646 00099575 QRY streamStateGetCur_rocksdb -25755:03/29 15:26:35.951691 00099575 QRY streamStateCurPrev_rocksdb -25756:03/29 15:26:35.951715 00099575 QRY streamStateGetKVByCur_rocksdb -25758:03/29 15:26:35.951746 00099575 QRY streamStateFreeCur -26929:03/29 15:26:36.385048 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -26935:03/29 15:26:36.385099 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27075:03/29 15:26:36.389048 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -27076:03/29 15:26:36.389266 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -27077:03/29 15:26:36.389305 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -27078:03/29 15:26:36.389330 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -27079:03/29 15:26:36.389362 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -27080:03/29 15:26:36.389386 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -27081:03/29 15:26:36.389410 00099569 QRY streamStateReleaseBuf -27082:03/29 15:26:36.389448 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -27083:03/29 15:26:36.389481 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -27087:03/29 15:26:36.389770 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -27088:03/29 15:26:36.389978 00099569 QRY streamStateReleaseBuf -27109:03/29 15:26:36.390413 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -27115:03/29 15:26:36.390538 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27121:03/29 15:26:36.390772 00099569 QRY streamStateReleaseBuf -27130:03/29 15:26:36.391003 00099569 QRY streamStateGetCur_rocksdb -27131:03/29 15:26:36.391083 00099569 QRY streamStateCurPrev_rocksdb -27132:03/29 15:26:36.391119 00099569 QRY streamStateGetKVByCur_rocksdb -27134:03/29 15:26:36.391162 00099569 QRY streamStateFreeCur -27712:03/29 15:26:36.613687 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -27713:03/29 15:26:36.613804 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -27714:03/29 15:26:36.613888 00099593 QRY streamStateReleaseBuf -27721:03/29 15:26:36.614032 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -27722:03/29 15:26:36.614063 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27726:03/29 15:26:36.614139 00099593 QRY streamStateReleaseBuf -27735:03/29 15:26:36.614284 00099593 QRY streamStateClear_rocksdb seq:19 -27919:03/29 15:26:36.617683 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -27925:03/29 15:26:36.617772 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -27927:03/29 15:26:36.617797 00099569 QRY streamStateReleaseBuf -27932:03/29 15:26:36.617832 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -27934:03/29 15:26:36.617940 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -27935:03/29 15:26:36.617987 00099569 QRY streamStateReleaseBuf -27948:03/29 15:26:36.618231 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -27949:03/29 15:26:36.618278 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27958:03/29 15:26:36.618525 00099569 QRY streamStateReleaseBuf -27982:03/29 15:26:36.618888 00099569 QRY streamStateGetCur_rocksdb -27994:03/29 15:26:36.619064 00099569 QRY streamStateCurPrev_rocksdb -27997:03/29 15:26:36.619671 00099569 QRY streamStateGetKVByCur_rocksdb -27999:03/29 15:26:36.619743 00099569 QRY streamStateFreeCur -28078:03/29 15:26:36.621690 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -28082:03/29 15:26:36.621782 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -28083:03/29 15:26:36.621794 00099575 QRY streamStateReleaseBuf -28132:03/29 15:26:36.622671 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -28200:03/29 15:26:36.623624 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28204:03/29 15:26:36.625551 00099575 QRY streamStateReleaseBuf -28232:03/29 15:26:36.626171 00099575 QRY streamStateClear_rocksdb seq:19 -28386:03/29 15:26:36.629674 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28387:03/29 15:26:36.629751 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -28388:03/29 15:26:36.629773 00099504 QRY streamStateReleaseBuf -28389:03/29 15:26:36.629807 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -28390:03/29 15:26:36.629932 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -28391:03/29 15:26:36.629955 00099504 QRY streamStateReleaseBuf -28400:03/29 15:26:36.630174 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28401:03/29 15:26:36.630206 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28405:03/29 15:26:36.630291 00099504 QRY streamStateReleaseBuf -28414:03/29 15:26:36.630450 00099504 QRY streamStateGetCur_rocksdb -28415:03/29 15:26:36.630505 00099504 QRY streamStateCurPrev_rocksdb -28416:03/29 15:26:36.630539 00099504 QRY streamStateGetKVByCur_rocksdb -28418:03/29 15:26:36.630581 00099504 QRY streamStateFreeCur -28729:03/29 15:26:36.637120 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -28730:03/29 15:26:36.637380 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -28731:03/29 15:26:36.637416 00099575 QRY streamStateReleaseBuf -28738:03/29 15:26:36.637723 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -28739:03/29 15:26:36.637749 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28743:03/29 15:26:36.637863 00099575 QRY streamStateReleaseBuf -28752:03/29 15:26:36.638110 00099575 QRY streamStateClear_rocksdb seq:19 -28941:03/29 15:26:36.643526 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28951:03/29 15:26:36.643634 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -28954:03/29 15:26:36.643786 00099569 QRY streamStateReleaseBuf -28955:03/29 15:26:36.643949 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -28959:03/29 15:26:36.643996 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -28961:03/29 15:26:36.644018 00099569 QRY streamStateReleaseBuf -28979:03/29 15:26:36.644396 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28980:03/29 15:26:36.644412 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29003:03/29 15:26:36.644735 00099569 QRY streamStateReleaseBuf -29047:03/29 15:26:36.645726 00099569 QRY streamStateGetCur_rocksdb -29049:03/29 15:26:36.645783 00099569 QRY streamStateCurPrev_rocksdb -29051:03/29 15:26:36.645816 00099569 QRY streamStateGetKVByCur_rocksdb -29056:03/29 15:26:36.645908 00099569 QRY streamStateFreeCur -29118:03/29 15:26:36.646825 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -29132:03/29 15:26:36.647146 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -29143:03/29 15:26:36.647241 00099575 QRY streamStateReleaseBuf -29218:03/29 15:26:36.648091 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -29220:03/29 15:26:36.648376 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29245:03/29 15:26:36.648720 00099575 QRY streamStateReleaseBuf -29292:03/29 15:26:36.649340 00099575 QRY streamStateClear_rocksdb seq:19 -29338:03/29 15:26:36.650291 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -29341:03/29 15:26:36.650366 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -29342:03/29 15:26:36.650401 00099504 QRY streamStateReleaseBuf -29387:03/29 15:26:36.651129 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -29388:03/29 15:26:36.651172 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29392:03/29 15:26:36.651221 00099504 QRY streamStateReleaseBuf -29424:03/29 15:26:36.652291 00099504 QRY streamStateClear_rocksdb seq:8 -29494:03/29 15:26:36.653157 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29495:03/29 15:26:36.653218 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -29496:03/29 15:26:36.653235 00099569 QRY streamStateReleaseBuf -29497:03/29 15:26:36.653261 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -29498:03/29 15:26:36.653387 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -29499:03/29 15:26:36.653409 00099569 QRY streamStateReleaseBuf -29508:03/29 15:26:36.653558 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29509:03/29 15:26:36.653585 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29513:03/29 15:26:36.653659 00099569 QRY streamStateReleaseBuf -29522:03/29 15:26:36.653787 00099569 QRY streamStateGetCur_rocksdb -29523:03/29 15:26:36.653835 00099569 QRY streamStateCurPrev_rocksdb -29524:03/29 15:26:36.653866 00099569 QRY streamStateGetKVByCur_rocksdb -29526:03/29 15:26:36.653899 00099569 QRY streamStateFreeCur -29929:03/29 15:26:36.660341 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29943:03/29 15:26:36.661005 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -29944:03/29 15:26:36.661020 00099575 QRY streamStateReleaseBuf -29956:03/29 15:26:36.661065 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -29958:03/29 15:26:36.661320 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -29960:03/29 15:26:36.661338 00099575 QRY streamStateReleaseBuf -30026:03/29 15:26:36.662339 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30027:03/29 15:26:36.662428 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30031:03/29 15:26:36.662458 00099575 QRY streamStateReleaseBuf -30040:03/29 15:26:36.662523 00099575 QRY streamStateGetCur_rocksdb -30044:03/29 15:26:36.662562 00099575 QRY streamStateCurPrev_rocksdb -30045:03/29 15:26:36.662609 00099575 QRY streamStateGetKVByCur_rocksdb -30047:03/29 15:26:36.662630 00099575 QRY streamStateFreeCur -30061:03/29 15:26:36.662751 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -30064:03/29 15:26:36.662911 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30065:03/29 15:26:36.662947 00099504 QRY streamStateReleaseBuf -30077:03/29 15:26:36.663118 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30079:03/29 15:26:36.663142 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30083:03/29 15:26:36.663188 00099504 QRY streamStateReleaseBuf -30094:03/29 15:26:36.663331 00099504 QRY streamStateClear_rocksdb seq:8 -30428:03/29 15:26:36.668781 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -30434:03/29 15:26:36.668875 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30435:03/29 15:26:36.668887 00099504 QRY streamStateReleaseBuf -30438:03/29 15:26:36.668903 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30443:03/29 15:26:36.668961 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -30444:03/29 15:26:36.668975 00099593 QRY streamStateReleaseBuf -30446:03/29 15:26:36.668995 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -30477:03/29 15:26:36.669061 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -30482:03/29 15:26:36.669375 00099593 QRY streamStateReleaseBuf -30513:03/29 15:26:36.669729 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30514:03/29 15:26:36.669879 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30521:03/29 15:26:36.670104 00099593 QRY streamStateReleaseBuf -30538:03/29 15:26:36.670259 00099593 QRY streamStateGetCur_rocksdb -30542:03/29 15:26:36.670463 00099593 QRY streamStateCurPrev_rocksdb -30543:03/29 15:26:36.670570 00099593 QRY streamStateGetKVByCur_rocksdb -30553:03/29 15:26:36.670608 00099593 QRY streamStateFreeCur -30573:03/29 15:26:36.671023 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30587:03/29 15:26:36.671407 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30588:03/29 15:26:36.671475 00099504 QRY streamStateReleaseBuf -30609:03/29 15:26:36.671716 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30613:03/29 15:26:36.671877 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30635:03/29 15:26:36.672365 00099504 QRY streamStateReleaseBuf -30708:03/29 15:26:36.673055 00099504 QRY streamStateClear_rocksdb seq:8 -30830:03/29 15:26:36.676200 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30839:03/29 15:26:36.676304 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -30840:03/29 15:26:36.676475 00099575 QRY streamStateReleaseBuf -30841:03/29 15:26:36.676505 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -30847:03/29 15:26:36.676557 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -30848:03/29 15:26:36.676641 00099575 QRY streamStateReleaseBuf -30857:03/29 15:26:36.676795 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30858:03/29 15:26:36.676816 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30862:03/29 15:26:36.676842 00099575 QRY streamStateReleaseBuf -30875:03/29 15:26:36.677036 00099575 QRY streamStateGetCur_rocksdb -30876:03/29 15:26:36.677079 00099575 QRY streamStateCurPrev_rocksdb -30877:03/29 15:26:36.677110 00099575 QRY streamStateGetKVByCur_rocksdb -30879:03/29 15:26:36.677141 00099575 QRY streamStateFreeCur -31236:03/29 15:26:36.873472 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -31237:03/29 15:26:36.873518 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31307:03/29 15:26:36.874993 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -31308:03/29 15:26:36.875063 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -31309:03/29 15:26:36.875081 00099504 QRY streamStateReleaseBuf -31331:03/29 15:26:36.875547 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -31332:03/29 15:26:36.875580 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31336:03/29 15:26:36.875665 00099504 QRY streamStateReleaseBuf -31345:03/29 15:26:36.875785 00099504 QRY streamStateClear_rocksdb seq:8 -31424:03/29 15:26:36.877822 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -31430:03/29 15:26:36.877932 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -31436:03/29 15:26:36.878107 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -31437:03/29 15:26:36.878149 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -31438:03/29 15:26:36.878214 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -31439:03/29 15:26:36.878238 00099593 QRY streamStateReleaseBuf -31440:03/29 15:26:36.878261 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -31442:03/29 15:26:36.878286 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -31445:03/29 15:26:36.878317 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -31446:03/29 15:26:36.878553 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -31447:03/29 15:26:36.878576 00099593 QRY streamStateReleaseBuf -31453:03/29 15:26:36.878672 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -31454:03/29 15:26:36.878734 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -31455:03/29 15:26:36.878756 00099593 QRY streamStateReleaseBuf -31456:03/29 15:26:36.878787 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -31457:03/29 15:26:36.878851 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -31458:03/29 15:26:36.878966 00099593 QRY streamStateReleaseBuf -31467:03/29 15:26:36.879144 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -31468:03/29 15:26:36.879166 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31472:03/29 15:26:36.879260 00099593 QRY streamStateReleaseBuf -31490:03/29 15:26:36.879535 00099593 QRY streamStateGetCur_rocksdb -31491:03/29 15:26:36.879594 00099593 QRY streamStateCurPrev_rocksdb -31492:03/29 15:26:36.879635 00099593 QRY streamStateGetKVByCur_rocksdb -31494:03/29 15:26:36.879677 00099593 QRY streamStateFreeCur -32790:03/29 15:26:37.312151 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to del from default -32791:03/29 15:26:37.312547 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -32956:03/29 15:26:37.316405 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -32958:03/29 15:26:37.316471 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -32990:03/29 15:26:37.316969 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -33005:03/29 15:26:37.317010 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to del from default -33007:03/29 15:26:37.317297 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -33008:03/29 15:26:37.317327 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -33017:03/29 15:26:37.317351 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -33020:03/29 15:26:37.317576 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -33029:03/29 15:26:37.317597 00099593 QRY streamStateReleaseBuf -33031:03/29 15:26:37.317809 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -33032:03/29 15:26:37.317862 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -33043:03/29 15:26:37.317920 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -33044:03/29 15:26:37.318013 00099593 QRY streamStateReleaseBuf -33065:03/29 15:26:37.318457 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -33066:03/29 15:26:37.318535 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -33070:03/29 15:26:37.318574 00099593 QRY streamStateReleaseBuf -33081:03/29 15:26:37.318869 00099593 QRY streamStateGetCur_rocksdb -33083:03/29 15:26:37.318927 00099593 QRY streamStateCurPrev_rocksdb -33084:03/29 15:26:37.318987 00099593 QRY streamStateGetKVByCur_rocksdb -33098:03/29 15:26:37.319222 00099593 QRY streamStateFreeCur -33227:03/29 15:26:37.322547 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -33233:03/29 15:26:37.322790 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -33242:03/29 15:26:37.323068 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -33253:03/29 15:26:37.323488 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -33255:03/29 15:26:37.324012 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -33266:03/29 15:26:37.324051 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -33292:03/29 15:26:37.325135 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -33341:03/29 15:26:37.325781 00099569 QRY streamStateGetCur_rocksdb -33342:03/29 15:26:37.325915 00099569 QRY streamStateCurPrev_rocksdb -33345:03/29 15:26:37.325981 00099569 QRY streamStateGetKVByCur_rocksdb -33347:03/29 15:26:37.326005 00099569 QRY streamStateFreeCur -33994:03/29 15:26:37.536707 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -33995:03/29 15:26:37.536789 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -33996:03/29 15:26:37.536808 00099593 QRY streamStateReleaseBuf -34003:03/29 15:26:37.536903 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -34004:03/29 15:26:37.536934 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34058:03/29 15:26:37.538122 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34059:03/29 15:26:37.538191 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34060:03/29 15:26:37.538213 00099593 QRY streamStateReleaseBuf -34101:03/29 15:26:37.539349 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -34103:03/29 15:26:37.539451 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34111:03/29 15:26:37.539595 00099593 QRY streamStateReleaseBuf -34131:03/29 15:26:37.540219 00099593 QRY streamStateClear_rocksdb seq:19 -34235:03/29 15:26:37.543707 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -34244:03/29 15:26:37.543761 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -34247:03/29 15:26:37.544063 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34249:03/29 15:26:37.544086 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34256:03/29 15:26:37.544113 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -34258:03/29 15:26:37.544228 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34260:03/29 15:26:37.544253 00099569 QRY streamStateReleaseBuf -34262:03/29 15:26:37.544289 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -34264:03/29 15:26:37.544314 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -34267:03/29 15:26:37.544364 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34268:03/29 15:26:37.544414 00099569 QRY streamStateReleaseBuf -34274:03/29 15:26:37.544525 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -34276:03/29 15:26:37.544619 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34278:03/29 15:26:37.544641 00099569 QRY streamStateReleaseBuf -34282:03/29 15:26:37.544672 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34287:03/29 15:26:37.544773 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -34290:03/29 15:26:37.544959 00099569 QRY streamStateReleaseBuf -34304:03/29 15:26:37.545517 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -34305:03/29 15:26:37.545543 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34312:03/29 15:26:37.545784 00099569 QRY streamStateReleaseBuf -34326:03/29 15:26:37.546147 00099569 QRY streamStateGetCur_rocksdb -34328:03/29 15:26:37.546207 00099569 QRY streamStateCurPrev_rocksdb -34331:03/29 15:26:37.546271 00099569 QRY streamStateGetKVByCur_rocksdb -34335:03/29 15:26:37.546314 00099569 QRY streamStateFreeCur -34551:03/29 15:26:37.550566 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34554:03/29 15:26:37.550759 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34559:03/29 15:26:37.550778 00099593 QRY streamStateReleaseBuf -34570:03/29 15:26:37.550993 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -34575:03/29 15:26:37.551021 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34690:03/29 15:26:37.552979 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34691:03/29 15:26:37.553221 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34692:03/29 15:26:37.553241 00099593 QRY streamStateReleaseBuf -34714:03/29 15:26:37.553789 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -34715:03/29 15:26:37.553814 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34719:03/29 15:26:37.553955 00099593 QRY streamStateReleaseBuf -34728:03/29 15:26:37.554115 00099593 QRY streamStateClear_rocksdb seq:19 -34964:03/29 15:26:37.560528 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -34967:03/29 15:26:37.560634 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -34969:03/29 15:26:37.560670 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34970:03/29 15:26:37.560727 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34971:03/29 15:26:37.560759 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -34976:03/29 15:26:37.560790 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34977:03/29 15:26:37.561047 00099584 QRY streamStateReleaseBuf -34980:03/29 15:26:37.561079 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -34982:03/29 15:26:37.561152 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -34983:03/29 15:26:37.561237 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34984:03/29 15:26:37.561255 00099584 QRY streamStateReleaseBuf -34997:03/29 15:26:37.561378 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -35002:03/29 15:26:37.561627 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -35004:03/29 15:26:37.561693 00099584 QRY streamStateReleaseBuf -35006:03/29 15:26:37.561744 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -35010:03/29 15:26:37.561791 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -35032:03/29 15:26:37.561838 00099584 QRY streamStateReleaseBuf -35053:03/29 15:26:37.562919 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -35054:03/29 15:26:37.562944 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35060:03/29 15:26:37.563088 00099584 QRY streamStateReleaseBuf -35084:03/29 15:26:37.563823 00099584 QRY streamStateGetCur_rocksdb -35092:03/29 15:26:37.563931 00099584 QRY streamStateCurPrev_rocksdb -35095:03/29 15:26:37.564020 00099584 QRY streamStateGetKVByCur_rocksdb -35097:03/29 15:26:37.564127 00099584 QRY streamStateFreeCur -35221:03/29 15:26:37.565946 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -35227:03/29 15:26:37.567086 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -35235:03/29 15:26:37.567172 00099504 QRY streamStateReleaseBuf -35266:03/29 15:26:37.568306 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -35274:03/29 15:26:37.568338 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35292:03/29 15:26:37.568747 00099504 QRY streamStateReleaseBuf -35330:03/29 15:26:37.569446 00099504 QRY streamStateClear_rocksdb seq:19 -35495:03/29 15:26:37.573622 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -35496:03/29 15:26:37.573751 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -35497:03/29 15:26:37.573772 00099504 QRY streamStateReleaseBuf -35498:03/29 15:26:37.573794 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -35499:03/29 15:26:37.573845 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -35500:03/29 15:26:37.573920 00099504 QRY streamStateReleaseBuf -35509:03/29 15:26:37.574057 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -35510:03/29 15:26:37.574082 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35514:03/29 15:26:37.574122 00099504 QRY streamStateReleaseBuf -35524:03/29 15:26:37.574307 00099504 QRY streamStateGetCur_rocksdb -35526:03/29 15:26:37.574365 00099504 QRY streamStateCurPrev_rocksdb -35529:03/29 15:26:37.574548 00099504 QRY streamStateGetKVByCur_rocksdb -35540:03/29 15:26:37.574750 00099504 QRY streamStateFreeCur -35855:03/29 15:26:37.597332 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -35856:03/29 15:26:37.597444 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -35857:03/29 15:26:37.597462 00099575 QRY streamStateReleaseBuf -35864:03/29 15:26:37.597607 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -35865:03/29 15:26:37.597647 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35921:03/29 15:26:37.598924 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -35922:03/29 15:26:37.598993 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -35923:03/29 15:26:37.599012 00099575 QRY streamStateReleaseBuf -35953:03/29 15:26:37.599550 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -35954:03/29 15:26:37.599717 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35961:03/29 15:26:37.600018 00099575 QRY streamStateReleaseBuf -35977:03/29 15:26:37.600259 00099575 QRY streamStateClear_rocksdb seq:8 -36129:03/29 15:26:37.603242 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -36133:03/29 15:26:37.603309 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -36136:03/29 15:26:37.603364 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36148:03/29 15:26:37.603423 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -36150:03/29 15:26:37.603621 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36151:03/29 15:26:37.603666 00099504 QRY streamStateReleaseBuf -36152:03/29 15:26:37.603696 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36153:03/29 15:26:37.603714 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -36162:03/29 15:26:37.603731 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -36165:03/29 15:26:37.604030 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36171:03/29 15:26:37.604047 00099504 QRY streamStateReleaseBuf -36178:03/29 15:26:37.604186 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36180:03/29 15:26:37.604236 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36181:03/29 15:26:37.604249 00099504 QRY streamStateReleaseBuf -36182:03/29 15:26:37.604268 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36193:03/29 15:26:37.604308 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -36194:03/29 15:26:37.604479 00099504 QRY streamStateReleaseBuf -36203:03/29 15:26:37.604668 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36204:03/29 15:26:37.604695 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36208:03/29 15:26:37.604762 00099504 QRY streamStateReleaseBuf -36217:03/29 15:26:37.604946 00099504 QRY streamStateGetCur_rocksdb -36218:03/29 15:26:37.605000 00099504 QRY streamStateCurPrev_rocksdb -36219:03/29 15:26:37.605054 00099504 QRY streamStateGetKVByCur_rocksdb -36221:03/29 15:26:37.605079 00099504 QRY streamStateFreeCur -36590:03/29 15:26:37.612667 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -36591:03/29 15:26:37.612748 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36592:03/29 15:26:37.612767 00099575 QRY streamStateReleaseBuf -36599:03/29 15:26:37.612867 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -36600:03/29 15:26:37.612900 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36657:03/29 15:26:37.613821 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -36658:03/29 15:26:37.613926 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36659:03/29 15:26:37.613941 00099575 QRY streamStateReleaseBuf -36681:03/29 15:26:37.614371 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -36682:03/29 15:26:37.614396 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36686:03/29 15:26:37.614465 00099575 QRY streamStateReleaseBuf -36695:03/29 15:26:37.614603 00099575 QRY streamStateClear_rocksdb seq:8 -36836:03/29 15:26:37.616970 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -36841:03/29 15:26:37.617055 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36847:03/29 15:26:37.617218 00099575 QRY streamStateReleaseBuf -36872:03/29 15:26:37.617580 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -36873:03/29 15:26:37.617594 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -36874:03/29 15:26:37.617605 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36878:03/29 15:26:37.617660 00099575 QRY streamStateReleaseBuf -36883:03/29 15:26:37.617638 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -36884:03/29 15:26:37.617742 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36891:03/29 15:26:37.617762 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -36895:03/29 15:26:37.617885 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36896:03/29 15:26:37.617924 00099504 QRY streamStateReleaseBuf -36898:03/29 15:26:37.617938 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36899:03/29 15:26:37.617951 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -36902:03/29 15:26:37.617998 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -36908:03/29 15:26:37.618051 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36909:03/29 15:26:37.617951 00099575 QRY streamStateClear_rocksdb seq:8 -36911:03/29 15:26:37.618063 00099504 QRY streamStateReleaseBuf -36923:03/29 15:26:37.618223 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36925:03/29 15:26:37.618268 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36926:03/29 15:26:37.618279 00099504 QRY streamStateReleaseBuf -36927:03/29 15:26:37.618319 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36928:03/29 15:26:37.618362 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -36929:03/29 15:26:37.618377 00099504 QRY streamStateReleaseBuf -36939:03/29 15:26:37.618467 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36940:03/29 15:26:37.618509 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36945:03/29 15:26:37.618592 00099504 QRY streamStateReleaseBuf -36971:03/29 15:26:37.619057 00099504 QRY streamStateGetCur_rocksdb -36972:03/29 15:26:37.619108 00099504 QRY streamStateCurPrev_rocksdb -36973:03/29 15:26:37.619154 00099504 QRY streamStateGetKVByCur_rocksdb -36975:03/29 15:26:37.619179 00099504 QRY streamStateFreeCur -37210:03/29 15:26:37.624330 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -37216:03/29 15:26:37.624434 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37217:03/29 15:26:37.624449 00099575 QRY streamStateReleaseBuf -37218:03/29 15:26:37.624474 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -37220:03/29 15:26:37.624513 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -37221:03/29 15:26:37.624548 00099575 QRY streamStateReleaseBuf -37230:03/29 15:26:37.624686 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -37231:03/29 15:26:37.624710 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37235:03/29 15:26:37.624748 00099575 QRY streamStateReleaseBuf -37248:03/29 15:26:37.624926 00099575 QRY streamStateGetCur_rocksdb -37251:03/29 15:26:37.624975 00099575 QRY streamStateCurPrev_rocksdb -37252:03/29 15:26:37.625028 00099575 QRY streamStateGetKVByCur_rocksdb -37254:03/29 15:26:37.625060 00099575 QRY streamStateFreeCur -37627:03/29 15:26:37.823270 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -37628:03/29 15:26:37.823315 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37682:03/29 15:26:37.824480 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -37683:03/29 15:26:37.824646 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -37684:03/29 15:26:37.824676 00099593 QRY streamStateReleaseBuf -37705:03/29 15:26:37.825482 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -37706:03/29 15:26:37.825552 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37710:03/29 15:26:37.825677 00099593 QRY streamStateReleaseBuf -37719:03/29 15:26:37.825826 00099593 QRY streamStateClear_rocksdb seq:8 -37818:03/29 15:26:37.830111 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -37821:03/29 15:26:37.830255 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -37822:03/29 15:26:37.830299 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -37823:03/29 15:26:37.830329 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -37824:03/29 15:26:37.830368 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -37825:03/29 15:26:37.830392 00099504 QRY streamStateReleaseBuf -37826:03/29 15:26:37.830423 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -37827:03/29 15:26:37.830458 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -37828:03/29 15:26:37.830490 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -37829:03/29 15:26:37.830558 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37830:03/29 15:26:37.830580 00099504 QRY streamStateReleaseBuf -37836:03/29 15:26:37.830669 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -37837:03/29 15:26:37.830733 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37838:03/29 15:26:37.830755 00099504 QRY streamStateReleaseBuf -37839:03/29 15:26:37.830787 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -37840:03/29 15:26:37.830842 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -37841:03/29 15:26:37.830864 00099504 QRY streamStateReleaseBuf -37850:03/29 15:26:37.831140 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -37851:03/29 15:26:37.831169 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37855:03/29 15:26:37.831253 00099504 QRY streamStateReleaseBuf -37864:03/29 15:26:37.831412 00099504 QRY streamStateGetCur_rocksdb -37865:03/29 15:26:37.831473 00099504 QRY streamStateCurPrev_rocksdb -37866:03/29 15:26:37.831548 00099504 QRY streamStateGetKVByCur_rocksdb -37868:03/29 15:26:37.831590 00099504 QRY streamStateFreeCur -38448:03/29 15:26:38.045401 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -38449:03/29 15:26:38.045492 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -38450:03/29 15:26:38.045507 00099575 QRY streamStateReleaseBuf -38457:03/29 15:26:38.045670 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -38458:03/29 15:26:38.045695 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38462:03/29 15:26:38.045736 00099575 QRY streamStateReleaseBuf -38471:03/29 15:26:38.045894 00099575 QRY streamStateClear_rocksdb seq:19 -38586:03/29 15:26:38.048400 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -38590:03/29 15:26:38.048485 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -38602:03/29 15:26:38.048526 00099593 QRY streamStateReleaseBuf -38606:03/29 15:26:38.048792 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -38614:03/29 15:26:38.048981 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -38616:03/29 15:26:38.049143 00099593 QRY streamStateReleaseBuf -38626:03/29 15:26:38.049379 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -38627:03/29 15:26:38.049402 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38635:03/29 15:26:38.049480 00099593 QRY streamStateReleaseBuf -38644:03/29 15:26:38.049633 00099593 QRY streamStateGetCur_rocksdb -38647:03/29 15:26:38.049683 00099593 QRY streamStateCurPrev_rocksdb -38648:03/29 15:26:38.049776 00099593 QRY streamStateGetKVByCur_rocksdb -38651:03/29 15:26:38.049812 00099593 QRY streamStateFreeCur -38954:03/29 15:26:38.056513 00099584 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -38962:03/29 15:26:38.056672 00099584 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -38963:03/29 15:26:38.056816 00099584 QRY streamStateReleaseBuf -38981:03/29 15:26:38.057479 00099584 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -38982:03/29 15:26:38.057676 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38986:03/29 15:26:38.057789 00099584 QRY streamStateReleaseBuf -39011:03/29 15:26:38.058195 00099584 QRY streamStateClear_rocksdb seq:19 -39234:03/29 15:26:38.063323 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39242:03/29 15:26:38.063607 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39243:03/29 15:26:38.063622 00099593 QRY streamStateReleaseBuf -39244:03/29 15:26:38.063679 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -39245:03/29 15:26:38.063721 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -39246:03/29 15:26:38.063740 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -39247:03/29 15:26:38.063758 00099593 QRY streamStateReleaseBuf -39252:03/29 15:26:38.063810 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -39254:03/29 15:26:38.063823 00099504 QRY streamStateReleaseBuf -39263:03/29 15:26:38.064107 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -39274:03/29 15:26:38.064139 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39278:03/29 15:26:38.064452 00099504 QRY streamStateReleaseBuf -39292:03/29 15:26:38.064610 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39295:03/29 15:26:38.064734 00099504 QRY streamStateClear_rocksdb seq:8 -39304:03/29 15:26:38.064698 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39308:03/29 15:26:38.065178 00099593 QRY streamStateReleaseBuf -39330:03/29 15:26:38.065629 00099593 QRY streamStateGetCur_rocksdb -39336:03/29 15:26:38.065809 00099593 QRY streamStateCurPrev_rocksdb -39341:03/29 15:26:38.065967 00099593 QRY streamStateGetKVByCur_rocksdb -39358:03/29 15:26:38.066225 00099593 QRY streamStateFreeCur -39530:03/29 15:26:38.069291 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39532:03/29 15:26:38.069399 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39533:03/29 15:26:38.069441 00099593 QRY streamStateReleaseBuf -39534:03/29 15:26:38.069469 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -39543:03/29 15:26:38.069576 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -39544:03/29 15:26:38.069618 00099593 QRY streamStateReleaseBuf -39570:03/29 15:26:38.069703 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -39575:03/29 15:26:38.070400 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -39576:03/29 15:26:38.070424 00099504 QRY streamStateReleaseBuf -39594:03/29 15:26:38.070680 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -39595:03/29 15:26:38.070791 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39599:03/29 15:26:38.070851 00099504 QRY streamStateReleaseBuf -39603:03/29 15:26:38.070998 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39607:03/29 15:26:38.071113 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39613:03/29 15:26:38.071207 00099593 QRY streamStateReleaseBuf -39657:03/29 15:26:38.072025 00099504 QRY streamStateClear_rocksdb seq:8 -39683:03/29 15:26:38.072546 00099593 QRY streamStateGetCur_rocksdb -39685:03/29 15:26:38.072616 00099593 QRY streamStateCurPrev_rocksdb -39688:03/29 15:26:38.072721 00099593 QRY streamStateGetKVByCur_rocksdb -39691:03/29 15:26:38.072755 00099593 QRY streamStateFreeCur -39868:03/29 15:26:38.079438 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39876:03/29 15:26:38.079538 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39877:03/29 15:26:38.079739 00099593 QRY streamStateReleaseBuf -39879:03/29 15:26:38.079775 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -39892:03/29 15:26:38.079910 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -39893:03/29 15:26:38.080684 00099593 QRY streamStateReleaseBuf -39916:03/29 15:26:38.081473 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39917:03/29 15:26:38.081567 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39924:03/29 15:26:38.081650 00099593 QRY streamStateReleaseBuf -39935:03/29 15:26:38.081780 00099593 QRY streamStateGetCur_rocksdb -39937:03/29 15:26:38.081903 00099593 QRY streamStateCurPrev_rocksdb -39941:03/29 15:26:38.081980 00099593 QRY streamStateGetKVByCur_rocksdb -39947:03/29 15:26:38.082150 00099593 QRY streamStateFreeCur -40238:03/29 15:26:38.088762 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -40239:03/29 15:26:38.088910 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -40240:03/29 15:26:38.088931 00099575 QRY streamStateReleaseBuf -40249:03/29 15:26:38.089088 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -40262:03/29 15:26:38.089141 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40271:03/29 15:26:38.089357 00099575 QRY streamStateReleaseBuf -40313:03/29 15:26:38.090004 00099575 QRY streamStateClear_rocksdb seq:19 -40561:03/29 15:26:38.094136 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -40563:03/29 15:26:38.094427 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -40565:03/29 15:26:38.094445 00099593 QRY streamStateReleaseBuf -40568:03/29 15:26:38.094478 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -40573:03/29 15:26:38.094521 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -40583:03/29 15:26:38.094535 00099593 QRY streamStateReleaseBuf -40636:03/29 15:26:38.095424 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -40637:03/29 15:26:38.095473 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40638:03/29 15:26:38.095496 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40642:03/29 15:26:38.095576 00099593 QRY streamStateReleaseBuf -40643:03/29 15:26:38.095504 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -40648:03/29 15:26:38.095597 00099575 QRY streamStateReleaseBuf -40663:03/29 15:26:38.095838 00099593 QRY streamStateGetCur_rocksdb -40666:03/29 15:26:38.095882 00099593 QRY streamStateCurPrev_rocksdb -40668:03/29 15:26:38.095926 00099593 QRY streamStateGetKVByCur_rocksdb -40670:03/29 15:26:38.095882 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -40671:03/29 15:26:38.095965 00099593 QRY streamStateFreeCur -40672:03/29 15:26:38.095972 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40678:03/29 15:26:38.096018 00099575 QRY streamStateReleaseBuf -40691:03/29 15:26:38.096133 00099575 QRY streamStateClear_rocksdb seq:19 -40842:03/29 15:26:38.098338 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40843:03/29 15:26:38.098396 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -40844:03/29 15:26:38.098410 00099593 QRY streamStateReleaseBuf -40855:03/29 15:26:38.098439 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -40859:03/29 15:26:38.098591 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -40860:03/29 15:26:38.098608 00099593 QRY streamStateReleaseBuf -40946:03/29 15:26:38.098842 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40947:03/29 15:26:38.100078 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40951:03/29 15:26:38.100132 00099593 QRY streamStateReleaseBuf -40960:03/29 15:26:38.100244 00099593 QRY streamStateGetCur_rocksdb -40961:03/29 15:26:38.100281 00099593 QRY streamStateCurPrev_rocksdb -40962:03/29 15:26:38.100315 00099593 QRY streamStateGetKVByCur_rocksdb -40964:03/29 15:26:38.100347 00099593 QRY streamStateFreeCur -41182:03/29 15:26:38.103325 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -41192:03/29 15:26:38.103733 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -41195:03/29 15:26:38.103855 00099504 QRY streamStateReleaseBuf -41215:03/29 15:26:38.104349 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -41216:03/29 15:26:38.104372 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41228:03/29 15:26:38.104601 00099504 QRY streamStateReleaseBuf -41285:03/29 15:26:38.105526 00099504 QRY streamStateClear_rocksdb seq:8 -41487:03/29 15:26:38.108582 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41490:03/29 15:26:38.108936 00099584 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -41491:03/29 15:26:38.108955 00099584 QRY streamStateReleaseBuf -41493:03/29 15:26:38.108983 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -41501:03/29 15:26:38.109032 00099584 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -41502:03/29 15:26:38.109122 00099584 QRY streamStateReleaseBuf -41527:03/29 15:26:38.109355 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41528:03/29 15:26:38.109395 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41533:03/29 15:26:38.109705 00099584 QRY streamStateReleaseBuf -41588:03/29 15:26:38.110405 00099584 QRY streamStateGetCur_rocksdb -41589:03/29 15:26:38.110567 00099584 QRY streamStateCurPrev_rocksdb -41591:03/29 15:26:38.110601 00099584 QRY streamStateGetKVByCur_rocksdb -41599:03/29 15:26:38.110694 00099584 QRY streamStateFreeCur -41606:03/29 15:26:38.110744 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -41613:03/29 15:26:38.110924 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -41614:03/29 15:26:38.111008 00099504 QRY streamStateReleaseBuf -41631:03/29 15:26:38.111153 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -41632:03/29 15:26:38.111212 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41637:03/29 15:26:38.111266 00099504 QRY streamStateReleaseBuf -41656:03/29 15:26:38.111573 00099504 QRY streamStateClear_rocksdb seq:8 -41784:03/29 15:26:38.113332 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41785:03/29 15:26:38.113399 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -41786:03/29 15:26:38.113417 00099575 QRY streamStateReleaseBuf -41787:03/29 15:26:38.113447 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -41788:03/29 15:26:38.113495 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -41789:03/29 15:26:38.113511 00099575 QRY streamStateReleaseBuf -41798:03/29 15:26:38.113713 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41799:03/29 15:26:38.113738 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41803:03/29 15:26:38.113803 00099575 QRY streamStateReleaseBuf -41812:03/29 15:26:38.113982 00099575 QRY streamStateGetCur_rocksdb -41813:03/29 15:26:38.114026 00099575 QRY streamStateCurPrev_rocksdb -41814:03/29 15:26:38.114062 00099575 QRY streamStateGetKVByCur_rocksdb -41816:03/29 15:26:38.114126 00099575 QRY streamStateFreeCur -42853:03/29 15:26:38.329316 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -42857:03/29 15:26:38.329888 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to del from default -42859:03/29 15:26:38.330165 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to del from default -42860:03/29 15:26:38.330208 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42861:03/29 15:26:38.330257 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42862:03/29 15:26:38.330275 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42958:03/29 15:26:38.332359 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -42964:03/29 15:26:38.332407 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to del from default -42965:03/29 15:26:38.332473 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42966:03/29 15:26:38.332488 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42981:03/29 15:26:38.332676 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -42984:03/29 15:26:38.332745 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -42986:03/29 15:26:38.332764 00099504 QRY streamStateReleaseBuf -43046:03/29 15:26:38.334165 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -43048:03/29 15:26:38.334198 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43053:03/29 15:26:38.334321 00099504 QRY streamStateReleaseBuf -43065:03/29 15:26:38.334481 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -43067:03/29 15:26:38.334534 00099504 QRY streamStateClear_rocksdb seq:19 -43068:03/29 15:26:38.334552 00099569 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -43069:03/29 15:26:38.334574 00099569 QRY streamStateReleaseBuf -43100:03/29 15:26:38.335311 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -43101:03/29 15:26:38.335423 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43109:03/29 15:26:38.335477 00099569 QRY streamStateReleaseBuf -43121:03/29 15:26:38.335782 00099569 QRY streamStateClear_rocksdb seq:8 -43282:03/29 15:26:38.339328 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -43283:03/29 15:26:38.339399 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -43289:03/29 15:26:38.339504 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to del from default -43290:03/29 15:26:38.339565 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -43291:03/29 15:26:38.339625 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to del from default -43292:03/29 15:26:38.339705 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to del from default -43293:03/29 15:26:38.339749 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -43294:03/29 15:26:38.339784 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -43295:03/29 15:26:38.339923 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -43296:03/29 15:26:38.339977 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -43297:03/29 15:26:38.339998 00099575 QRY streamStateReleaseBuf -43298:03/29 15:26:38.340051 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -43299:03/29 15:26:38.340084 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -43300:03/29 15:26:38.340164 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43301:03/29 15:26:38.340186 00099575 QRY streamStateReleaseBuf -43302:03/29 15:26:38.340222 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -43303:03/29 15:26:38.340256 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -43304:03/29 15:26:38.340310 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -43305:03/29 15:26:38.340345 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -43306:03/29 15:26:38.340579 00099575 QRY streamStateReleaseBuf -43307:03/29 15:26:38.340617 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -43308:03/29 15:26:38.340649 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -43309:03/29 15:26:38.340721 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43310:03/29 15:26:38.340743 00099575 QRY streamStateReleaseBuf -43311:03/29 15:26:38.340781 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -43312:03/29 15:26:38.340834 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -43313:03/29 15:26:38.340874 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -43314:03/29 15:26:38.340924 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -43315:03/29 15:26:38.340947 00099575 QRY streamStateReleaseBuf -43316:03/29 15:26:38.341105 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:2,seq: 15] failed to read from default, err: not exist -43317:03/29 15:26:38.341238 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:3,seq: 17] failed to read from default, err: not exist -43318:03/29 15:26:38.341400 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43319:03/29 15:26:38.341431 00099575 QRY streamStateReleaseBuf -43325:03/29 15:26:38.341593 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43326:03/29 15:26:38.341676 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43327:03/29 15:26:38.341699 00099575 QRY streamStateReleaseBuf -43328:03/29 15:26:38.341739 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -43329:03/29 15:26:38.342152 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -43330:03/29 15:26:38.342181 00099575 QRY streamStateReleaseBuf -43337:03/29 15:26:38.342311 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -43338:03/29 15:26:38.342376 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to del from default -43339:03/29 15:26:38.342435 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -43340:03/29 15:26:38.342519 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to del from default -43341:03/29 15:26:38.342561 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -43342:03/29 15:26:38.342594 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -43343:03/29 15:26:38.342629 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -43344:03/29 15:26:38.342661 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -43345:03/29 15:26:38.342698 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -43346:03/29 15:26:38.342738 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -43347:03/29 15:26:38.342776 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -43348:03/29 15:26:38.342799 00099575 QRY streamStateReleaseBuf -43349:03/29 15:26:38.342834 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -43350:03/29 15:26:38.342865 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:2,seq: 15] failed to read from default, err: not exist -43351:03/29 15:26:38.342908 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:3,seq: 17] failed to read from default, err: not exist -43352:03/29 15:26:38.342975 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43353:03/29 15:26:38.342997 00099575 QRY streamStateReleaseBuf -43359:03/29 15:26:38.343177 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43360:03/29 15:26:38.343248 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43361:03/29 15:26:38.343269 00099575 QRY streamStateReleaseBuf -43362:03/29 15:26:38.343305 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -43363:03/29 15:26:38.343368 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -43364:03/29 15:26:38.343389 00099575 QRY streamStateReleaseBuf -43373:03/29 15:26:38.343768 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43382:03/29 15:26:38.344088 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -43383:03/29 15:26:38.344118 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43387:03/29 15:26:38.344203 00099575 QRY streamStateReleaseBuf -43388:03/29 15:26:38.344244 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43389:03/29 15:26:38.344273 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43393:03/29 15:26:38.344355 00099575 QRY streamStateReleaseBuf -43403:03/29 15:26:38.344525 00099575 QRY streamStateGetCur_rocksdb -43404:03/29 15:26:38.344584 00099575 QRY streamStateCurPrev_rocksdb -43405:03/29 15:26:38.344662 00099575 QRY streamStateGetKVByCur_rocksdb -43407:03/29 15:26:38.344692 00099575 QRY streamStateFreeCur -51853:03/29 15:26:39.501810 00099718 QRY streamStateSetNumber, seq: 20 -51854:03/29 15:26:39.501840 00099718 QRY streamStateSetNumber, seq: 21 -51858:03/29 15:26:39.507708 00099713 QRY streamStateSetNumber, seq: 22 -51859:03/29 15:26:39.507730 00099713 QRY streamStateSetNumber, seq: 23 -51861:03/29 15:26:39.507756 00099713 QRY streamStateSetNumber, seq: 24 -51862:03/29 15:26:39.507860 00099713 QRY streamStateSetNumber, seq: 25 -51864:03/29 15:26:39.507885 00099713 QRY streamStateSetNumber, seq: 26 -51865:03/29 15:26:39.508056 00099713 QRY streamStateSetNumber, seq: 27 -51866:03/29 15:26:39.508083 00099713 QRY streamStateSetNumber, seq: 28 -51869:03/29 15:26:39.508105 00099713 QRY streamStateSetNumber, seq: 29 -51874:03/29 15:26:39.508152 00099713 QRY streamStateSetNumber, seq: 30 -51883:03/29 15:26:39.508317 00099723 QRY streamStateSetNumber, seq: 31 -51885:03/29 15:26:39.508343 00099723 QRY streamStateSetNumber, seq: 32 -51974:03/29 15:26:39.520642 00099728 QRY streamStateSetNumber, seq: 33 -51975:03/29 15:26:39.520671 00099728 QRY streamStateSetNumber, seq: 34 -52162:03/29 15:26:39.588845 00099713 QRY streamStateSetNumber, seq: 35 -52163:03/29 15:26:39.588940 00099713 QRY streamStateSetNumber, seq: 36 -52499:03/29 15:26:39.649197 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -52500:03/29 15:26:39.649903 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -52501:03/29 15:26:39.649918 00099593 QRY streamStateReleaseBuf -52516:03/29 15:26:39.650272 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -52517:03/29 15:26:39.650333 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52521:03/29 15:26:39.650384 00099593 QRY streamStateReleaseBuf -52530:03/29 15:26:39.650500 00099593 QRY streamStateClear_rocksdb seq:36 -52718:03/29 15:26:39.653130 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] failed to read from default, err: not exist -52788:03/29 15:26:39.654198 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -52789:03/29 15:26:39.654217 00099575 QRY streamStateReleaseBuf -52790:03/29 15:26:39.654226 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -52791:03/29 15:26:39.654248 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -52792:03/29 15:26:39.654280 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -52793:03/29 15:26:39.654295 00099584 QRY streamStateReleaseBuf -52794:03/29 15:26:39.654295 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -52796:03/29 15:26:39.654313 00099575 QRY streamStateReleaseBuf -52806:03/29 15:26:39.654460 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -52811:03/29 15:26:39.654483 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52815:03/29 15:26:39.654616 00099584 QRY streamStateReleaseBuf -52823:03/29 15:26:39.654626 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52825:03/29 15:26:39.654922 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52829:03/29 15:26:39.655001 00099575 QRY streamStateReleaseBuf -52834:03/29 15:26:39.654938 00099584 QRY streamStateClear_rocksdb seq:36 -52875:03/29 15:26:39.655766 00099575 QRY streamStateGetCur_rocksdb -52880:03/29 15:26:39.655894 00099575 QRY streamStateCurPrev_rocksdb -52881:03/29 15:26:39.655933 00099575 QRY streamStateGetKVByCur_rocksdb -52884:03/29 15:26:39.655957 00099575 QRY streamStateFreeCur -52941:03/29 15:26:39.656606 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52950:03/29 15:26:39.656678 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -52970:03/29 15:26:39.656771 00099575 QRY streamStateReleaseBuf -52972:03/29 15:26:39.657012 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to read from default -52973:03/29 15:26:39.657072 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -52974:03/29 15:26:39.657088 00099575 QRY streamStateReleaseBuf -52992:03/29 15:26:39.657214 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52997:03/29 15:26:39.657436 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53001:03/29 15:26:39.657517 00099575 QRY streamStateReleaseBuf -53047:03/29 15:26:39.658463 00099575 QRY streamStateGetCur_rocksdb -53048:03/29 15:26:39.658509 00099575 QRY streamStateCurPrev_rocksdb -53049:03/29 15:26:39.658527 00099575 QRY streamStateGetKVByCur_rocksdb -53051:03/29 15:26:39.658549 00099575 QRY streamStateFreeCur -53519:03/29 15:26:39.666194 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to del from default -53522:03/29 15:26:39.666264 00099732 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53582:03/29 15:26:39.667215 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -53583:03/29 15:26:39.667264 00099732 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -53584:03/29 15:26:39.667281 00099732 QRY streamStateReleaseBuf -53605:03/29 15:26:39.667661 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -53606:03/29 15:26:39.667684 00099732 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53610:03/29 15:26:39.667748 00099732 QRY streamStateReleaseBuf -53619:03/29 15:26:39.667875 00099732 QRY streamStateClear_rocksdb seq:36 -53718:03/29 15:26:39.669594 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to del from default -53721:03/29 15:26:39.669669 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to del from default -53723:03/29 15:26:39.669696 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -53725:03/29 15:26:39.669714 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 26] failed to read from default, err: not exist -53736:03/29 15:26:39.669728 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 28] failed to read from default, err: not exist -53743:03/29 15:26:39.669965 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 30] failed to read from default, err: not exist -53763:03/29 15:26:39.670203 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] failed to read from default, err: not exist -53765:03/29 15:26:39.670397 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -53766:03/29 15:26:39.670414 00099569 QRY streamStateReleaseBuf -53767:03/29 15:26:39.670459 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -53771:03/29 15:26:39.670497 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -53772:03/29 15:26:39.670570 00099569 QRY streamStateReleaseBuf -53794:03/29 15:26:39.670935 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -53795:03/29 15:26:39.670954 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53803:03/29 15:26:39.671070 00099569 QRY streamStateReleaseBuf -53818:03/29 15:26:39.671279 00099569 QRY streamStateGetCur_rocksdb -53819:03/29 15:26:39.671326 00099569 QRY streamStateCurPrev_rocksdb -53820:03/29 15:26:39.671356 00099569 QRY streamStateGetKVByCur_rocksdb -53822:03/29 15:26:39.671388 00099569 QRY streamStateFreeCur -55456:03/29 15:26:40.136068 00099717 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to del from default -55457:03/29 15:26:40.136115 00099717 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -55621:03/29 15:26:40.140154 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to del from default -55624:03/29 15:26:40.140304 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to del from default -55626:03/29 15:26:40.140337 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -55627:03/29 15:26:40.140362 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 26] failed to read from default, err: not exist -55629:03/29 15:26:40.140394 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 28] failed to read from default, err: not exist -55636:03/29 15:26:40.140426 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 30] failed to read from default, err: not exist -55647:03/29 15:26:40.140826 00099722 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -55656:03/29 15:26:40.140968 00099722 QRY streamStateGetCur_rocksdb -55657:03/29 15:26:40.141149 00099722 QRY streamStateFreeCur -55658:03/29 15:26:40.141191 00099722 QRY streamStateCurPrev_rocksdb diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 17d24c12b6..9bdabf030b 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1028,6 +1028,7 @@ ,,y,script,./test.sh -f tsim/stream/basic2.sim ,,y,script,./test.sh -f tsim/stream/basic3.sim ,,y,script,./test.sh -f tsim/stream/basic4.sim +,,y,script,./test.sh -f tsim/stream/checkpointInterval0.sim ,,y,script,./test.sh -f tsim/stream/checkStreamSTable1.sim ,,y,script,./test.sh -f tsim/stream/checkStreamSTable.sim ,,y,script,./test.sh -f tsim/stream/deleteInterval.sim diff --git a/tests/script/log b/tests/script/log new file mode 100644 index 0000000000..276333a1ae --- /dev/null +++ b/tests/script/log @@ -0,0 +1,103 @@ +------------------------------------------------------------------------ +Start TDengine Testing Case ... +BUILD_DIR: /root/yihao/work/TDengine/debug +SIM_DIR : /root/yihao/work/TDengine/sim +CODE_DIR : /root/yihao/work/TDengine/tests/script +CFG_DIR : /root/yihao/work/TDengine/sim/tsim/cfg +ASAN_DIR : /root/yihao/work/TDengine/sim/asan +------------------------------------------------------------------------ +ExcuteCmd: /root/yihao/work/TDengine/debug/build/bin/tsim -c /root/yihao/work/TDengine/sim/tsim/cfg -f tsim/stream/state0.sim +AsanDir: /root/yihao/work/TDengine/sim/asan/tsim.asan +08/18 17:00:43.118420 00438970 SIM simulator is running ... +Executing deploy.sh +SCRIPT_DIR: /root/yihao/work/TDengine/tests/script +------------ start dnode1 +nohup /root/yihao/work/TDengine/debug/build/bin/taosd -c /root/yihao/work/TDengine/sim/dnode1/cfg > /dev/null 2>&1 & +08/18 17:00:43.288417 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms begin +08/18 17:00:43.338486 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms finished +08/18 17:00:43.679227 00438970 SIM script:tsim/stream/state0.sim, =============== create database +08/18 17:00:45.450379 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL +08/18 17:00:45.454189 00438970 SIM script:tsim/stream/state0.sim, create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a); +08/18 17:00:45.610789 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:46.610955 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:46.611228 00438970 SIM script:tsim/stream/state0.sim, =====rows=0 +08/18 17:00:46.627080 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:47.627254 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:47.637364 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:48.637446 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:48.684845 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:49.685055 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:49.685390 00438970 SIM script:tsim/stream/state0.sim, =====rows=0 +08/18 17:00:49.712001 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:50.712137 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:50.713185 00438970 SIM script:tsim/stream/state0.sim, loop1 end +08/18 17:00:50.737435 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:51.737618 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:51.738003 00438970 SIM script:tsim/stream/state0.sim, =====data21=null +08/18 17:00:51.764834 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:52.764971 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:52.805578 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:53.805742 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:53.806044 00438970 SIM script:tsim/stream/state0.sim, =====data26=null +08/18 17:00:53.828622 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:54.828756 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:54.890423 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:55.890533 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:55.890839 00438970 SIM script:tsim/stream/state0.sim, =====data21=null +08/18 17:00:55.917818 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:56.918008 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:56.966614 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:57.966809 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:57.967085 00438970 SIM script:tsim/stream/state0.sim, ====loop4=rows=0 +08/18 17:00:57.993757 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:58.993936 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:58.994839 00438970 SIM script:tsim/stream/state0.sim, loop4 end +08/18 17:00:59.678915 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL +08/18 17:00:59.698124 00438970 SIM script:tsim/stream/state0.sim, create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a); +08/18 17:00:59.862389 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:00.862605 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:01.584856 00438970 SIM script:tsim/stream/state0.sim, create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt3 as select _wstart, count(*) c1, sum(b) c3 from t1 state_window(a); +08/18 17:01:01.778514 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:02.778699 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:09.536746 00438970 SIM script:tsim/stream/state0.sim, create stream if not exists streams4 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart AS startts, min(c1),count(c1) from t1 state_window(c1); +08/18 17:01:09.789267 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:10.789450 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:10.863370 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:11.863556 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:11.909184 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:12.909288 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:12.936362 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:13.936494 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:15.301064 00438970 SIM script:tsim/stream/state0.sim, create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3 from tb state_window(a); +08/18 17:01:15.466754 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.466768 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.489373 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.489392 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.511826 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.511841 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.522397 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.522416 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.526939 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:16.527119 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:16.542936 00438970 SIM script:tsim/stream/state0.sim, state0 end +------------ stop dnode1 +try to kill by signal SIGINT +try to kill by signal SIGINT +try to kill by signal SIGINT +try to kill by signal SIGINT +08/18 17:01:20.674086 00438970 SIM script:tsim/stream/state0.sim, return cmd execute with:1 +08/18 17:01:20.674103 00438970 SIM script:tsim/stream/state0.sim, success +08/18 17:01:20.674165 00438970 SIM script:tsim/stream/state0.sim, background script num:0, stop them +08/18 17:01:20.674521 00438970 SIM ---------------------------------------------------------------------- +08/18 17:01:20.674528 00438970 SIM Simulation Test Done, 1 Passed: + +08/18 17:01:20.674534 00438970 SIM thread is stopped +08/18 17:01:20.674537 00438970 SIM execute result 0 +Execute result: 0 +Killing taosd processes +asan error_num: 0 +asan memory_leak: 0 +asan indirect_leak: 0 +asan runtime error: 0 +asan python error: 0 +no asan errors diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 5b1773e664..7da8da09bf 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -118,7 +118,7 @@ echo "statusInterval 1" >> $TAOS_CFG echo "dataDir $DATA_DIR" >> $TAOS_CFG echo "logDir $LOG_DIR" >> $TAOS_CFG echo "debugFlag 0" >> $TAOS_CFG -echo "tmrDebugFlag 143" >> $TAOS_CFG +echo "tmrDebugFlag 131" >> $TAOS_CFG echo "uDebugFlag 143" >> $TAOS_CFG echo "rpcDebugFlag 143" >> $TAOS_CFG echo "jniDebugFlag 143" >> $TAOS_CFG diff --git a/tests/script/tsim/stream/checkpointInterval0.sim b/tests/script/tsim/stream/checkpointInterval0.sim new file mode 100644 index 0000000000..1c212eb2a7 --- /dev/null +++ b/tests/script/tsim/stream/checkpointInterval0.sim @@ -0,0 +1,255 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 interval(10s); +sql create stream streams1 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, sum(a) from t1 interval(10s); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +$loop_count = 0 + +loop01: +sleep 1000 + +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 0 then + print =====rows=$rows expect 1 + goto loop01 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791223003,4,2,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + + +$loop_count = 0 + +loop3: +sleep 1000 + +print select * from streamt1; +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 2 + goto loop3 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop3 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop3 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791223004,5,2,3,1.1); + +loop4: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop4 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop4 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop4 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop4 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop4 +endi + +$loop_count = 0 + +loop5: +sleep 1000 + +print select * from streamt1; +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 2 + goto loop5 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop5 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop5 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointInterval1.sim b/tests/script/tsim/stream/checkpointInterval1.sim new file mode 100644 index 0000000000..21825e7f48 --- /dev/null +++ b/tests/script/tsim/stream/checkpointInterval1.sim @@ -0,0 +1,104 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +sql create database test vgroups 4; + +sql use test; + +sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st interval(10s); + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +sql insert into t2 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); +sql insert into t2 values(1648791223003,4,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop1 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointSession0.sim b/tests/script/tsim/stream/checkpointSession0.sim new file mode 100644 index 0000000000..1d503806c5 --- /dev/null +++ b/tests/script/tsim/stream/checkpointSession0.sim @@ -0,0 +1,178 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 session(ts, 10s); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791233003,4,2,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791233004,5,2,3,1.1); + +loop20: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop20 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop20 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop20 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop20 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop20 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointSession1.sim b/tests/script/tsim/stream/checkpointSession1.sim new file mode 100644 index 0000000000..5c9625aabb --- /dev/null +++ b/tests/script/tsim/stream/checkpointSession1.sim @@ -0,0 +1,104 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +sql create database test vgroups 4; + +sql use test; + +sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st session(ts, 10s); + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +sql insert into t2 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); +sql insert into t2 values(1648791233003,4,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop1 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointState0.sim b/tests/script/tsim/stream/checkpointState0.sim new file mode 100644 index 0000000000..3836721212 --- /dev/null +++ b/tests/script/tsim/stream/checkpointState0.sim @@ -0,0 +1,178 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 state_window(b); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791233003,4,3,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791233004,5,3,3,1.1); + +loop20: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop20 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop20 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop20 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop20 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop20 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/win-test-file b/tests/script/win-test-file index 4d578a93cd..4ff4b52f7e 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -237,6 +237,52 @@ ./test.sh -f tsim/table/table.sim ./test.sh -f tsim/table/tinyint.sim ./test.sh -f tsim/table/vgroup.sim +./test.sh -f tsim/stream/basic0.sim -g +./test.sh -f tsim/stream/basic1.sim +./test.sh -f tsim/stream/basic2.sim +./test.sh -f tsim/stream/basic3.sim +./test.sh -f tsim/stream/basic4.sim +./test.sh -f tsim/stream/checkpointInterval0.sim +./test.sh -f tsim/stream/checkStreamSTable1.sim +./test.sh -f tsim/stream/checkStreamSTable.sim +./test.sh -f tsim/stream/deleteInterval.sim +./test.sh -f tsim/stream/deleteSession.sim +./test.sh -f tsim/stream/deleteState.sim +./test.sh -f tsim/stream/distributeInterval0.sim +./test.sh -f tsim/stream/distributeIntervalRetrive0.sim +./test.sh -f tsim/stream/distributeSession0.sim +./test.sh -f tsim/stream/drop_stream.sim +./test.sh -f tsim/stream/fillHistoryBasic1.sim +./test.sh -f tsim/stream/fillHistoryBasic2.sim +./test.sh -f tsim/stream/fillHistoryBasic3.sim +./test.sh -f tsim/stream/fillIntervalDelete0.sim +./test.sh -f tsim/stream/fillIntervalDelete1.sim +./test.sh -f tsim/stream/fillIntervalLinear.sim +./test.sh -f tsim/stream/fillIntervalPartitionBy.sim +./test.sh -f tsim/stream/fillIntervalPrevNext1.sim +./test.sh -f tsim/stream/fillIntervalPrevNext.sim +./test.sh -f tsim/stream/fillIntervalRange.sim +./test.sh -f tsim/stream/fillIntervalValue.sim +./test.sh -f tsim/stream/ignoreCheckUpdate.sim +./test.sh -f tsim/stream/ignoreExpiredData.sim +./test.sh -f tsim/stream/partitionby1.sim +./test.sh -f tsim/stream/partitionbyColumnInterval.sim +./test.sh -f tsim/stream/partitionbyColumnSession.sim +./test.sh -f tsim/stream/partitionbyColumnState.sim +./test.sh -f tsim/stream/partitionby.sim +./test.sh -f tsim/stream/pauseAndResume.sim +./test.sh -f tsim/stream/schedSnode.sim +./test.sh -f tsim/stream/session0.sim +./test.sh -f tsim/stream/session1.sim +./test.sh -f tsim/stream/sliding.sim +./test.sh -f tsim/stream/state0.sim +./test.sh -f tsim/stream/state1.sim +./test.sh -f tsim/stream/triggerInterval0.sim +./test.sh -f tsim/stream/triggerSession0.sim +./test.sh -f tsim/stream/udTableAndTag0.sim +./test.sh -f tsim/stream/udTableAndTag1.sim +./test.sh -f tsim/stream/udTableAndTag2.sim +./test.sh -f tsim/stream/windowClose.sim ./test.sh -f tsim/trans/lossdata1.sim ./test.sh -f tsim/tmq/basic1.sim ./test.sh -f tsim/tmq/basic2.sim diff --git a/tests/system-test/6-cluster/rollup.json b/tests/system-test/6-cluster/rollup.json index 02669acb93..a7bbc89fd7 100644 --- a/tests/system-test/6-cluster/rollup.json +++ b/tests/system-test/6-cluster/rollup.json @@ -1,6 +1,6 @@ { "filetype": "insert", - "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/", "host": "localhost", "port": 6030, "rest_port": 6041, diff --git a/tests/system-test/6-cluster/rollup_db.json b/tests/system-test/6-cluster/rollup_db.json index fedc47024c..d9ccd08ba5 100644 --- a/tests/system-test/6-cluster/rollup_db.json +++ b/tests/system-test/6-cluster/rollup_db.json @@ -1,6 +1,6 @@ { "filetype": "insert", - "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/", "host": "localhost", "port": 6030, "rest_port": 6041,