Merge pull request #21725 from taosdata/enh/triggerCheckPoint2

Enh/trigger check point2
This commit is contained in:
Haojun Liao 2023-08-30 22:54:31 +08:00 committed by GitHub
commit 4a9b48ef98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
88 changed files with 8157 additions and 3792 deletions

View File

@ -191,6 +191,7 @@ extern int64_t tsWalFsyncDataSizeLimit;
extern int32_t tsTransPullupInterval;
extern int32_t tsMqRebalanceInterval;
extern int32_t tsStreamCheckpointTickInterval;
extern int32_t tsStreamNodeCheckInterval;
extern int32_t tsTtlUnit;
extern int32_t tsTtlPushIntervalSec;
extern int32_t tsTtlBatchDropNum;
@ -203,7 +204,6 @@ extern int32_t tsRpcRetryInterval;
extern bool tsDisableStream;
extern int64_t tsStreamBufferSize;
extern int64_t tsCheckpointInterval;
extern bool tsFilterScalarMode;
extern int32_t tsKeepTimeOffset;
extern int32_t tsMaxStreamBackendCache;

View File

@ -28,6 +28,22 @@ typedef struct SCorEpSet {
} SCorEpSet;
#define GET_ACTIVE_EP(_eps) (&((_eps)->eps[(_eps)->inUse]))
#define EPSET_TO_STR(_eps, tbuf) \
do { \
int len = snprintf((tbuf), sizeof(tbuf), "epset:{"); \
for (int _i = 0; _i < (_eps)->numOfEps; _i++) { \
if (_i == (_eps)->numOfEps - 1) { \
len += \
snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \
} else { \
len += \
snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d, ", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \
} \
} \
len += snprintf((tbuf) + len, sizeof(tbuf) - len, "}, inUse:%d", (_eps)->inUse); \
} while (0);
int32_t taosGetFqdnPortFromEp(const char* ep, SEp* pEp);
void addEpIntoEpSet(SEpSet* pEpSet, const char* fqdn, uint16_t port);

View File

@ -157,6 +157,7 @@ enum { // WARN: new msg should be appended to segment tail
TD_DEF_MSG_TYPE(TDMT_MND_TRANS_TIMER, "trans-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_TTL_TIMER, "ttl-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_GRANT_HB_TIMER, "grant-hb-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_NODECHECK_TIMER, "node-check-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_KILL_TRANS, "kill-trans", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_KILL_QUERY, "kill-query", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_KILL_CONN, "kill-conn", NULL, NULL)
@ -175,13 +176,16 @@ enum { // WARN: new msg should be appended to segment tail
TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL)
// TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL)
// TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_HEARTBEAT, "stream-heartbeat", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_BALANCE_VGROUP_LEADER, "balance-vgroup-leader", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_RESTORE_DNODE, "restore-dnode", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_PAUSE_STREAM, "pause-stream", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_VND_MSG)
@ -255,15 +259,13 @@ enum { // WARN: new msg should be appended to segment tail
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_PAUSE, "stream-task-pause", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESUME, "stream-task-resume", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_STOP, "stream-task-stop", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_MON_MSG)
TD_DEF_MSG_TYPE(TDMT_MON_MAX_MSG, "monitor-max", NULL, NULL)
@ -300,9 +302,12 @@ enum { // WARN: new msg should be appended to segment tail
TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL)
// TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY_FINISH, "vnode-stream-scan-history-finish", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_UPDATE, "vnode-stream-update", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG)

View File

@ -41,23 +41,21 @@ typedef struct {
} SLocalFetch;
typedef struct {
void* tqReader;
void* config;
void* tqReader; // todo remove it
void* vnode;
void* mnd;
SMsgCb* pMsgCb;
int64_t version;
bool initMetaReader;
uint64_t checkpointId;
bool initTableReader;
bool initTqReader;
int32_t numOfVgroups;
void* sContext; // SSnapContext*
void* pStateBackend;
struct SStorageAPI api;
int8_t fillHistory;
STimeWindow winRange;
struct SStorageAPI api;
} SReadHandle;
// in queue mode, data streams are seperated by msg
@ -97,9 +95,6 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId);
int32_t qSetStreamOpOpen(qTaskInfo_t tinfo);
// todo refactor
void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId);
/**
* Set multiple input data blocks for the stream scan.
* @param tinfo

View File

@ -379,7 +379,7 @@ typedef struct SStateStore {
state_key_cmpr_fn fn, void** pVal, int32_t* pVLen);
int32_t (*streamStateSessionGetKeyByRange)(SStreamState* pState, const SSessionKey* range, SSessionKey* curKey);
SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark);
SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark, bool igUp);
TSKEY (*updateInfoFillBlockData)(SUpdateInfo* pInfo, SSDataBlock* pBlock, int32_t primaryTsCol);
bool (*updateInfoIsUpdated)(SUpdateInfo* pInfo, uint64_t tableId, TSKEY ts);
bool (*updateInfoIsTableInserted)(SUpdateInfo* pInfo, int64_t tbUid);
@ -387,7 +387,7 @@ typedef struct SStateStore {
void (*windowSBfDelete)(SUpdateInfo *pInfo, uint64_t count);
void (*windowSBfAdd)(SUpdateInfo *pInfo, uint64_t count);
SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark);
SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark, bool igUp);
void (*updateInfoAddCloseWindowSBF)(SUpdateInfo* pInfo);
void (*updateInfoDestoryColseWinSBF)(SUpdateInfo* pInfo);
int32_t (*updateInfoSerialize)(void* buf, int32_t bufLen, const SUpdateInfo* pInfo);
@ -398,7 +398,8 @@ typedef struct SStateStore {
SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key);
struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize,
uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id);
uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark,
const char* id, int64_t ckId);
void (*streamFileStateDestroy)(struct SStreamFileState* pFileState);
void (*streamFileStateClear)(struct SStreamFileState* pFileState);

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _STREAM_BACKEDN_SNAPSHOT_H_
#define _STREAM_BACKEDN_SNAPSHOT_H_
#include "tcommon.h"
#define STREAM_STATE_TRANSFER "stream-state-transfer"
typedef struct SStreamSnapReader SStreamSnapReader;
typedef struct SStreamSnapWriter SStreamSnapWriter;
typedef struct SStreamSnapHandle SStreamSnapHandle;
typedef struct SStreamSnapBlockHdr SStreamSnapBlockHdr;
int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapReader** ppReader);
int32_t streamSnapReaderClose(SStreamSnapReader* pReader);
int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size);
// SMetaSnapWriter ========================================
int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter);
int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData);
int32_t streamSnapWriterClose(SStreamSnapWriter* ppWriter, int8_t rollback);
#endif

View File

@ -20,6 +20,7 @@
#include "tmsg.h"
#include "tmsgcb.h"
#include "tqueue.h"
#include "ttimer.h"
#ifdef __cplusplus
extern "C" {
@ -48,6 +49,8 @@ enum {
TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner
TASK_STATUS__HALT, // pause, but not be manipulated by user command
TASK_STATUS__PAUSE, // pause
TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore
TASK_STATUS__CK_READY,
};
enum {
@ -61,15 +64,12 @@ enum {
enum {
TASK_INPUT_STATUS__NORMAL = 1,
TASK_INPUT_STATUS__BLOCKED,
TASK_INPUT_STATUS__RECOVER,
TASK_INPUT_STATUS__STOP,
TASK_INPUT_STATUS__FAILED,
};
enum {
TASK_OUTPUT_STATUS__NORMAL = 1,
TASK_OUTPUT_STATUS__WAIT,
TASK_OUTPUT_STATUS__BLOCKED,
};
enum {
@ -97,11 +97,16 @@ enum {
STREAM_QUEUE__PROCESSING,
};
enum {
STREAM_META_WILL_STOP = 1,
STREAM_META_OK_TO_STOP = 2,
};
typedef struct {
int8_t type;
} SStreamQueueItem;
typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data);
typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data);
typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver);
typedef struct {
@ -120,13 +125,12 @@ typedef struct {
typedef struct {
int8_t type;
int64_t nodeId; // nodeId, from SStreamMeta
int32_t srcVgId;
int32_t srcTaskId;
int32_t childId;
int64_t sourceVer;
int64_t reqId;
SArray* blocks; // SArray<SSDataBlock>
} SStreamDataBlock;
@ -136,10 +140,6 @@ typedef struct {
SSDataBlock* pBlock;
} SStreamRefDataBlock;
typedef struct {
int8_t type;
} SStreamCheckpoint;
typedef struct {
int8_t type;
SSDataBlock* pBlock;
@ -189,6 +189,7 @@ int32_t streamInit();
void streamCleanUp();
SStreamQueue* streamQueueOpen(int64_t cap);
void streamQueueCleanup(SStreamQueue* pQueue);
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId);
static FORCE_INLINE void streamQueueProcessSuccess(SStreamQueue* queue) {
@ -252,19 +253,25 @@ typedef struct SStreamChildEpInfo {
int32_t nodeId;
int32_t childId;
int32_t taskId;
int8_t dataAllowed;
SEpSet epSet;
bool dataAllowed; // denote if the data from this upstream task is allowed to put into inputQ, not serialize it
int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer
} SStreamChildEpInfo;
typedef struct SStreamId {
typedef struct SStreamTaskKey {
int64_t streamId;
int64_t taskId;
} SStreamTaskKey;
typedef struct SStreamTaskId {
int64_t streamId;
int32_t taskId;
const char* idStr;
} SStreamId;
} SStreamTaskId;
typedef struct SCheckpointInfo {
int64_t id;
int64_t version; // offset in WAL
int64_t checkpointId;
int64_t checkpointVer; // latest checkpointId version
int64_t currentVer; // current offset in WAL, not serialize it
} SCheckpointInfo;
@ -273,7 +280,6 @@ typedef struct SStreamStatus {
int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set
int8_t schedStatus;
int8_t keepTaskStatus;
bool transferState;
bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it
int8_t timerActive; // timer is active
int8_t pauseAllowed; // allowed task status to be set to be paused
@ -287,6 +293,7 @@ typedef struct SHistDataRange {
typedef struct SSTaskBasicInfo {
int32_t nodeId; // vgroup id or snode id
SEpSet epSet;
SEpSet mnodeEpset; // mnode epset for send heartbeat
int32_t selfChildId;
int32_t totalLevel;
int8_t taskLevel;
@ -314,7 +321,7 @@ typedef struct {
struct SStreamTask {
int64_t ver;
SStreamId id;
SStreamTaskId id;
SSTaskBasicInfo info;
STaskOutputInfo outputInfo;
SDispatchMsgInfo msgInfo;
@ -322,12 +329,15 @@ struct SStreamTask {
SCheckpointInfo chkInfo;
STaskExec exec;
SHistDataRange dataRange;
SStreamId historyTaskId;
SStreamId streamTaskId;
SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info
SStreamTaskId historyTaskId;
SStreamTaskId streamTaskId;
int32_t nextCheckId;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
STaskTimestamp tsInfo;
SArray* pReadyMsgList; // SArray<SStreamChkptReadyInfo*>
TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ
SArray* pUpstreamInfoList;
// output
union {
STaskDispatcherFixedEp fixedEpDispatcher;
@ -348,7 +358,6 @@ struct SStreamTask {
SMsgCb* pMsgCb; // msg handle
SStreamState* pState; // state backend
SArray* pRspMsgList;
TdThreadMutex lock;
// the followings attributes don't be serialized
int32_t notReadyTasks;
@ -358,11 +367,18 @@ struct SStreamTask {
int32_t refCnt;
int64_t checkpointingId;
int32_t checkpointAlignCnt;
int32_t checkpointNotReadyTasks;
int32_t transferStateAlignCnt;
struct SStreamMeta* pMeta;
SSHashObj* pNameMap;
};
typedef struct SMetaHbInfo {
tmr_h hbTmr;
int32_t stopFlag;
int32_t tickCounter;
} SMetaHbInfo;
// meta
typedef struct SStreamMeta {
char* path;
@ -375,12 +391,25 @@ typedef struct SStreamMeta {
TXN* txn;
FTaskExpand* expandFunc;
int32_t vgId;
int64_t stage;
SRWLatch lock;
int32_t walScanCounter;
void* streamBackend;
int64_t streamBackendRid;
SHashObj* pTaskBackendUnique;
TdThreadMutex backendMutex;
SMetaHbInfo hbInfo;
int32_t closedTask;
int32_t totalTasks; // this value should be increased when a new task is added into the meta
int32_t chkptNotReadyTasks;
int64_t rid;
int64_t chkpId;
SArray* chkpSaved;
SArray* chkpInUse;
int32_t chkpCap;
SRWLatch chkpDirLock;
int32_t pauseTaskNum;
} SStreamMeta;
int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo);
@ -391,6 +420,10 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto
int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask);
int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask);
void tFreeStreamTask(SStreamTask* pTask);
int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver);
int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo);
int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem);
bool tInputQueueIsFull(const SStreamTask* pTask);
@ -401,8 +434,9 @@ typedef struct {
} SStreamTaskRunReq;
typedef struct {
int64_t streamId;
int32_t type;
int64_t stage; // nodeId from upstream task
int64_t streamId;
int32_t taskId;
int32_t srcVgId;
int32_t upstreamTaskId;
@ -443,6 +477,7 @@ typedef struct {
typedef struct {
int64_t reqId;
int64_t stage;
int64_t streamId;
int32_t upstreamNodeId;
int32_t upstreamTaskId;
@ -459,6 +494,7 @@ typedef struct {
int32_t downstreamNodeId;
int32_t downstreamTaskId;
int32_t childId;
int32_t oldStage;
int8_t status;
} SStreamTaskCheckRsp;
@ -485,6 +521,8 @@ typedef struct {
int64_t checkpointId;
int32_t taskId;
int32_t nodeId;
SEpSet mgmtEps;
int32_t mnodeId;
int64_t expireTime;
} SStreamCheckpointSourceReq;
@ -493,14 +531,16 @@ typedef struct {
int64_t checkpointId;
int32_t taskId;
int32_t nodeId;
int32_t mnodeId;
int64_t expireTime;
int8_t success;
} SStreamCheckpointSourceRsp;
int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq);
int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq);
int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq);
int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq);
int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp);
int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp);
int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp);
int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp);
typedef struct {
SMsgHead msgHead;
@ -511,28 +551,25 @@ typedef struct {
int32_t upstreamTaskId;
int32_t upstreamNodeId;
int32_t childId;
int64_t expireTime;
int8_t taskLevel;
} SStreamCheckpointReq;
} SStreamCheckpointReadyMsg;
typedef struct {
SMsgHead msgHead;
int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pRsp);
int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp);
typedef struct STaskStatusEntry {
int64_t streamId;
int64_t checkpointId;
int32_t downstreamTaskId;
int32_t downstreamNodeId;
int32_t upstreamTaskId;
int32_t upstreamNodeId;
int32_t childId;
int64_t expireTime;
int8_t taskLevel;
} SStreamCheckpointRsp;
int32_t taskId;
int32_t status;
} STaskStatusEntry;
int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq);
int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq);
typedef struct SStreamHbMsg {
int32_t vgId;
int32_t numOfTasks;
SArray* pTaskStatus; // SArray<SStreamTaskStatusEntry>
} SStreamHbMsg;
int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp);
int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp);
int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp);
int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pRsp);
typedef struct {
int64_t streamId;
@ -545,6 +582,29 @@ typedef struct {
int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq);
int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistoryMsg* pReq);
typedef struct SNodeUpdateInfo {
int32_t nodeId;
SEpSet prevEp;
SEpSet newEp;
} SNodeUpdateInfo;
typedef struct SStreamTaskNodeUpdateMsg {
int64_t streamId;
int32_t taskId;
SArray* pNodeList; // SArray<SNodeUpdateInfo>
} SStreamTaskNodeUpdateMsg;
int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg);
int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg);
typedef struct SStreamTaskNodeUpdateRsp {
int64_t streamId;
int32_t taskId;
} SStreamTaskNodeUpdateRsp;
int32_t tEncodeStreamTaskUpdateRsp(SEncoder* pEncoder, const SStreamTaskNodeUpdateRsp* pMsg);
int32_t tDecodeStreamTaskUpdateRsp(SDecoder* pDecoder, SStreamTaskNodeUpdateRsp* pMsg);
typedef struct {
int64_t streamId;
int32_t downstreamTaskId;
@ -564,16 +624,11 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq)
int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp);
int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp);
int32_t tEncodeSStreamTaskScanHistoryReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq);
int32_t tDecodeSStreamTaskScanHistoryReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq);
int32_t tEncodeSStreamTaskRecoverRsp(SEncoder* pEncoder, const SStreamRecoverDownstreamRsp* pRsp);
int32_t tDecodeSStreamTaskRecoverRsp(SDecoder* pDecoder, SStreamRecoverDownstreamRsp* pRsp);
int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq);
int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq);
int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq);
void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq);
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq);
int32_t streamSetupScheduleTrigger(SStreamTask* pTask);
@ -581,10 +636,9 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask);
int32_t streamProcessRunReq(SStreamTask* pTask);
int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec);
int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code);
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId);
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask);
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg);
SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId);
void streamTaskInputFail(SStreamTask* pTask);
int32_t streamTryExec(SStreamTask* pTask);
@ -594,16 +648,19 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus);
bool streamTaskShouldPause(const SStreamStatus* pStatus);
bool streamTaskIsIdle(const SStreamTask* pTask);
SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId);
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize);
void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId);
// recover and fill history
void streamTaskCheckDownstreamTasks(SStreamTask* pTask);
int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask);
int32_t streamTaskLaunchScanHistory(SStreamTask* pTask);
int32_t streamTaskCheckStatus(SStreamTask* pTask);
int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage);
int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList);
void streamTaskResetUpstreamStageInfo(SStreamTask* pTask);
int32_t streamTaskStop(SStreamTask* pTask);
int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp,
SRpcHandleInfo* pRpcInfo, int32_t taskId);
int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp);
@ -611,17 +668,26 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask);
int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask);
int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated);
bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer);
int32_t streamTaskGetInputQItems(const SStreamTask* pTask);
// common
int32_t streamRestoreParam(SStreamTask* pTask);
int32_t streamSetStatusNormal(SStreamTask* pTask);
const char* streamGetTaskStatusStr(int32_t status);
void streamTaskPause(SStreamTask* pTask);
void streamTaskResume(SStreamTask* pTask);
void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta);
void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta);
void streamTaskHalt(SStreamTask* pTask);
void streamTaskResumeFromHalt(SStreamTask* pTask);
void streamTaskDisablePause(SStreamTask* pTask);
void streamTaskEnablePause(SStreamTask* pTask);
int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask);
void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet);
void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet);
void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask);
int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask);
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId);
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask);
// source level
int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow);
@ -640,31 +706,32 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask);
// stream task meta
void streamMetaInit();
void streamMetaCleanup();
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId);
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage);
void streamMetaClose(SStreamMeta* streamMeta);
// save to b-tree meta store
int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask);
int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId);
int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store
int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey);
int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded);
int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId);
int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); // todo remove it
int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta);
int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta);
SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId);
void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask);
int32_t streamMetaBegin(SStreamMeta* pMeta);
int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId);
int32_t streamMetaCommit(SStreamMeta* pMeta);
int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver);
int32_t streamLoadTasks(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta);
// checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq);
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp);
int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask);
int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask);
int32_t streamAlignTransferState(SStreamTask* pTask);
int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask,
int8_t isSucceed);
int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg,
int8_t isSucceed);
#ifdef __cplusplus
}
#endif

View File

@ -31,7 +31,8 @@ typedef struct SStreamFileState SStreamFileState;
typedef SList SStreamSnapshot;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark, const char* id);
GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId,
int64_t checkpointId);
void streamFileStateDestroy(SStreamFileState* pFileState);
void streamFileStateClear(SStreamFileState* pFileState);
bool needClearDiskBuff(SStreamFileState* pFileState);
@ -44,7 +45,7 @@ bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen);
SStreamSnapshot* getSnapshot(SStreamFileState* pFileState);
int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState);
int32_t recoverSnapshot(SStreamFileState* pFileState);
int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId);
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list);
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark);

View File

@ -43,8 +43,8 @@ typedef struct SUpdateKey {
// uint64_t maxDataVersion;
//} SUpdateInfo;
SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark);
SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark);
SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp);
SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp);
TSKEY updateInfoFillBlockData(SUpdateInfo *pInfo, SSDataBlock *pBlock, int32_t primaryTsCol);
bool updateInfoIsUpdated(SUpdateInfo *pInfo, uint64_t tableId, TSKEY ts);
bool updateInfoIsTableInserted(SUpdateInfo *pInfo, int64_t tbUid);

View File

@ -200,8 +200,11 @@ void taosArrayClear(SArray* pArray);
* @param pArray
* @param fp
*/
void taosArrayClearEx(SArray* pArray, void (*fp)(void*));
void taosArrayClearP(SArray* pArray, void (*fp)(void*));
void* taosArrayDestroy(SArray* pArray);
void taosArrayDestroyP(SArray* pArray, FDelete fp);

View File

@ -89,7 +89,7 @@ typedef struct {
RET = -1; \
} \
tEncoderClear(&coder); \
} while (0)
} while (0);
static void* tEncoderMalloc(SEncoder* pCoder, int32_t size);
static void* tDecoderMalloc(SDecoder* pCoder, int32_t size);

View File

@ -85,8 +85,6 @@ typedef uint16_t VarDataLenT; // maxVarDataLen: 65535
#define varDataVal(v) ((char *)(v) + VARSTR_HEADER_SIZE)
#define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v))
#define NCHAR_WIDTH_TO_BYTES(n) ((n)*TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE)
typedef int32_t VarDataOffsetT;
typedef struct tstr {

View File

@ -826,6 +826,25 @@ TEST(clientCase, projection_query_tables) {
}
taos_free_result(pRes);
int64_t start = 1685959190000;
int32_t code = -1;
for(int32_t i = 0; i < 1000000; ++i) {
char t[512] = {0};
sprintf(t, "insert into t1 values(%ld, %ld)", start + i, i);
while(1) {
void* p = taos_query(pConn, t);
code = taos_errno(p);
taos_free_result(p);
if (code != 0) {
printf("insert data error, retry\n");
} else {
break;
}
}
}
for (int32_t i = 0; i < 1; ++i) {
printf("create table :%d\n", i);
createNewTable(pConn, i);
@ -901,12 +920,39 @@ TEST(clientCase, agg_query_tables) {
}
taos_free_result(pRes);
pRes = taos_query(pConn, "show table distributed tup");
if (taos_errno(pRes) != 0) {
printf("failed to select from table, reason:%s\n", taos_errstr(pRes));
int64_t st = 1685959293000;
for (int32_t i = 0; i < 10000000; ++i) {
char s[256] = {0};
while (1) {
sprintf(s, "insert into t1 values(%ld, %d)", st + i, i);
pRes = taos_query(pConn, s);
int32_t ret = taos_errno(pRes);
taos_free_result(pRes);
ASSERT_TRUE(false);
if (ret == 0) {
break;
}
}
while (1) {
sprintf(s, "insert into t2 values(%ld, %d)", st + i, i);
pRes = taos_query(pConn, s);
int32_t ret = taos_errno(pRes);
taos_free_result(pRes);
if (ret == 0) {
break;
}
}
}
// pRes = taos_query(pConn, "show table distributed tup");
// if (taos_errno(pRes) != 0) {
// printf("failed to select from table, reason:%s\n", taos_errstr(pRes));
// taos_free_result(pRes);
// ASSERT_TRUE(false);
// }
printResult(pRes);
taos_free_result(pRes);

View File

@ -240,7 +240,8 @@ int32_t tsTtlBatchDropNum = 10000; // number of tables dropped per batch
// internal
int32_t tsTransPullupInterval = 2;
int32_t tsMqRebalanceInterval = 2;
int32_t tsStreamCheckpointTickInterval = 1;
int32_t tsStreamCheckpointTickInterval = 20;
int32_t tsStreamNodeCheckInterval = 10;
int32_t tsTtlUnit = 86400;
int32_t tsTtlPushIntervalSec = 10;
int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups
@ -250,7 +251,6 @@ char tsUdfdResFuncs[512] = ""; // udfd resident funcs that teardown when udf
char tsUdfdLdLibPath[512] = "";
bool tsDisableStream = false;
int64_t tsStreamBufferSize = 128 * 1024 * 1024;
int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000;
bool tsFilterScalarMode = false;
int32_t tsKeepTimeOffset = 0; // latency of data migration
int tsResolveFQDNRetryTime = 100; // seconds
@ -263,6 +263,8 @@ char tsS3BucketName[TSDB_FQDN_LEN] = "<bucketname>";
char tsS3AppId[TSDB_FQDN_LEN] = "<appid>";
int8_t tsS3Enabled = false;
int32_t tsCheckpointInterval = 20;
#ifndef _STORAGE
int32_t taosSetTfsCfg(SConfig *pCfg) {
SConfigItem *pItem = cfgGetItem(pCfg, "dataDir");
@ -1057,7 +1059,6 @@ static int32_t taosSetServerCfg(SConfig *pCfg) {
tsDisableStream = cfgGetItem(pCfg, "disableStream")->bval;
tsStreamBufferSize = cfgGetItem(pCfg, "streamBufferSize")->i64;
tsCheckpointInterval = cfgGetItem(pCfg, "checkpointInterval")->i64;
tsFilterScalarMode = cfgGetItem(pCfg, "filterScalarMode")->bval;
tsKeepTimeOffset = cfgGetItem(pCfg, "keepTimeOffset")->i32;

View File

@ -33,9 +33,11 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) {
return -1;
}
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica,
SMnodeOpt option = {.deploy = true,
.numOfReplicas = createReq.replica,
.numOfTotalReplicas = createReq.replica + createReq.learnerReplica,
.selfIndex = -1, .lastIndex = createReq.lastIndex};
.selfIndex = -1,
.lastIndex = createReq.lastIndex};
memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas));
for (int32_t i = 0; i < createReq.replica; ++i) {
@ -204,6 +206,10 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;

View File

@ -76,10 +76,12 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
code = 0;
_OVER:

View File

@ -783,20 +783,24 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
// if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;

View File

@ -574,7 +574,7 @@ void* tDecodeSMqConsumerObj(const void* buf, SMqConsumerObj* pConsumer
typedef struct {
int32_t vgId;
// char* qmsg; // SubPlanToString
// char* qmsg; // SubPlanToString
SEpSet epSet;
} SMqVgEp;
@ -589,8 +589,8 @@ typedef struct {
SArray* offsetRows; // SArray<OffsetRows*>
} SMqConsumerEp;
//SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp);
//void tDeleteSMqConsumerEp(void* pEp);
// SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp);
// void tDeleteSMqConsumerEp(void* pEp);
int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pEp);
void* tDecodeSMqConsumerEp(const void* buf, SMqConsumerEp* pEp, int8_t sver);
@ -706,18 +706,21 @@ typedef struct {
int64_t currentTick; // do not serialize
int64_t deleteMark;
int8_t igCheckUpdate;
// 3.0.5.
int64_t checkpointId;
} SStreamObj;
int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj);
int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver);
void tFreeStreamObj(SStreamObj* pObj);
//typedef struct {
// typedef struct {
// char streamName[TSDB_STREAM_FNAME_LEN];
// int64_t uid;
// int64_t streamUid;
// SArray* childInfo; // SArray<SStreamChildEpInfo>
//} SStreamCheckpointObj;
// } SStreamCheckpointObj;
#ifdef __cplusplus
}

View File

@ -33,6 +33,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw);
int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb);
int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream);
// for sma
// TODO refactor
int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream);

View File

@ -77,10 +77,13 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) {
if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1;
// 3.0.20
// 3.0.20 ver =2
if (tEncodeI64(pEncoder, pObj->checkpointFreq) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->igCheckUpdate) < 0) return -1;
// 3.0.50 ver = 3
if (tEncodeI64(pEncoder, pObj->checkpointId) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
@ -151,6 +154,9 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
if (tDecodeI8(pDecoder, &pObj->igCheckUpdate) < 0) return -1;
}
}
if (sver >= 3) {
if (tDecodeI64(pDecoder, &pObj->checkpointId) < 0) return -1;
}
tEndDecode(pDecoder);
return 0;
}

View File

@ -128,29 +128,31 @@ static void mndPullupTrimDb(SMnode *pMnode) {
}
static void mndCalMqRebalance(SMnode *pMnode) {
mTrace("calc mq rebalance");
int32_t contLen = 0;
void *pReq = mndBuildTimerMsg(&contLen);
if (pReq != NULL) {
SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen };
SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen};
tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg);
}
}
#if 0
static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) {
int32_t contLen = 0;
void *pReq = mndBuildCheckpointTickMsg(&contLen, sec);
if (pReq != NULL) {
SRpcMsg rpcMsg = {
.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER,
.pCont = pReq,
.contLen = contLen,
};
SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, .pCont = pReq, .contLen = contLen};
tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg);
}
}
static void mndStreamCheckNode(SMnode* pMnode) {
int32_t contLen = 0;
void *pReq = mndBuildTimerMsg(&contLen);
if (pReq != NULL) {
SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen};
tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg);
}
}
#endif
static void mndPullupTelem(SMnode *pMnode) {
mTrace("pullup telem msg");
@ -279,11 +281,13 @@ static void *mndThreadFp(void *param) {
mndCalMqRebalance(pMnode);
}
#if 0
if (sec % tsStreamCheckpointTickInterval == 0) {
mndStreamCheckpointTick(pMnode, sec);
}
#endif
if (sec % tsStreamNodeCheckInterval == 0) {
mndStreamCheckNode(pMnode);
}
if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) {
mndPullupTelem(pMnode);
@ -599,7 +603,7 @@ int32_t mndIsCatchUp(SMnode *pMnode) {
return syncIsCatchUp(rid);
}
ESyncRole mndGetRole(SMnode *pMnode){
ESyncRole mndGetRole(SMnode *pMnode) {
int64_t rid = pMnode->syncMgmt.sync;
return syncGetRole(rid);
}

View File

@ -14,6 +14,8 @@
*/
#include "mndScheduler.h"
#include "tmisce.h"
#include "mndMnode.h"
#include "mndDb.h"
#include "mndSnode.h"
#include "mndVgroup.h"
@ -25,10 +27,8 @@
#define SINK_NODE_LEVEL (0)
extern bool tsDeployOnSnode;
static int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream);
static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId,
SVgObj* pVgroup, int32_t fillHistory);
static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask);
SVgObj* pVgroup, SEpSet* pEpset, int32_t fillHistory);
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
int64_t watermark, int64_t deleteMark) {
@ -141,7 +141,7 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr
}
} else {
SStreamTask* pOneSinkTask = taosArrayGetP(pSinkNodeList, 0);
setFixedDownstreamEpInfo(pTask, pOneSinkTask);
streamTaskSetFixedDownstreamInfo(pTask, pOneSinkTask);
}
return 0;
@ -207,7 +207,8 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
}
// create sink node for each vgroup.
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) {
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, SEpSet* pEpset,
int32_t fillHistory) {
SSdb* pSdb = pMnode->pSdb;
void* pIter = NULL;
@ -223,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea
continue;
}
mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory);
mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, pEpset, fillHistory);
sdbRelease(pSdb, pVgroup);
}
@ -231,7 +232,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea
}
int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup,
int32_t fillHistory) {
SEpSet* pEpset, int32_t fillHistory) {
int64_t uid = (fillHistory == 0)? pStream->uid:pStream->hTaskUid;
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList);
if (pTask == NULL) {
@ -239,6 +240,8 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p
return -1;
}
epsetAssign(&(pTask)->info.mnodeEpset, pEpset);
pTask->info.nodeId = vgId;
pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
mndSetSinkTaskInfo(pStream, pTask);
@ -246,13 +249,15 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p
}
static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList,
SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory,
bool hasExtraSink, int64_t firstWindowSkey) {
SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset,
int8_t fillHistory, bool hasExtraSink, int64_t firstWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList);
if (pTask == NULL) {
return terrno;
}
epsetAssign(&pTask->info.mnodeEpset, pEpset);
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
@ -273,51 +278,12 @@ static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTas
for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) {
SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i);
setTaskUpstreamEpInfo(pTask, pSinkTask);
streamTaskSetUpstreamInfo(pSinkTask, pTask);
}
return TSDB_CODE_SUCCESS;
}
static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) {
SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo));
if (pEpInfo == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pEpInfo->childId = pTask->info.selfChildId;
pEpInfo->epSet = pTask->info.epSet;
pEpInfo->nodeId = pTask->info.nodeId;
pEpInfo->taskId = pTask->id.taskId;
return pEpInfo;
}
void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) {
STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher;
pDispatcher->taskId = pTask->id.taskId;
pDispatcher->nodeId = pTask->info.nodeId;
pDispatcher->epSet = pTask->info.epSet;
pDstTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH;
pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
}
int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream) {
SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask);
if (pEpInfo == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
if (pDownstream->pUpstreamEpInfoList == NULL) {
pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES);
}
taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo);
return TSDB_CODE_SUCCESS;
}
static SArray* addNewTaskList(SArray* pTasksList) {
SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pTasksList, &pTaskList);
@ -342,7 +308,7 @@ static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) {
}
static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream,
bool hasExtraSink, int64_t nextWindowSkey) {
SEpSet* pEpset, bool hasExtraSink, int64_t nextWindowSkey) {
// create exec stream task, since only one level, the exec task is also the source task
SArray* pTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
@ -379,8 +345,8 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan*
// new stream task
SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0,
hasExtraSink, nextWindowSkey);
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, pEpset,
0, hasExtraSink, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return -1;
@ -389,7 +355,7 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan*
if (pStream->conf.fillHistory) {
SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL);
code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid,
1, hasExtraSink, nextWindowSkey);
pEpset, 1, hasExtraSink, nextWindowSkey);
}
sdbRelease(pSdb, pVgroup);
@ -406,13 +372,16 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan*
}
static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask,
SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) {
SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, SEpSet* pEpset,
int64_t nextWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
epsetAssign(&(pTask)->info.mnodeEpset, pEpset);
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
@ -422,22 +391,24 @@ static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t ui
pWindow->skey, pWindow->ekey);
// all the source tasks dispatch result to a single agg node.
setFixedDownstreamEpInfo(pTask, pDownstreamTask);
streamTaskSetFixedDownstreamInfo(pTask, pDownstreamTask);
if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) {
return -1;
}
return setTaskUpstreamEpInfo(pTask, pDownstreamTask);
return streamTaskSetUpstreamInfo(pDownstreamTask, pTask);
}
static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream,
int32_t fillHistory, SStreamTask** pAggTask) {
SEpSet* pEpset, int32_t fillHistory, SStreamTask** pAggTask) {
*pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList);
if (*pAggTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
epsetAssign(&(*pAggTask)->info.mnodeEpset, pEpset);
// dispatch
if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) {
return -1;
@ -446,8 +417,8 @@ static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeLi
return 0;
}
static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask,
SStreamTask** pHAggTask) {
static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SEpSet* pEpset,
SStreamTask** pAggTask, SStreamTask** pHAggTask) {
SArray* pAggTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
@ -461,7 +432,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan
*pAggTask = NULL;
SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask);
int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, pEpset, 0, pAggTask);
if (code != TSDB_CODE_SUCCESS) {
return -1;
}
@ -489,7 +460,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan
SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL);
*pHAggTask = NULL;
code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory,
code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pEpset, pStream->conf.fillHistory,
pHAggTask);
if (code != TSDB_CODE_SUCCESS) {
if (pSnode != NULL) {
@ -519,7 +490,8 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan
}
static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream,
SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) {
SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask,
SEpSet* pEpset, int64_t nextWindowSkey) {
SArray* pSourceTaskList = addNewTaskList(pStream->tasks);
SArray* pHSourceTaskList = NULL;
@ -549,7 +521,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl
}
int32_t code =
doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey);
doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, pEpset, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
terrno = code;
@ -558,7 +530,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl
if (pStream->conf.fillHistory) {
code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup,
nextWindowSkey);
pEpset, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return code;
@ -576,16 +548,16 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl
}
static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList,
int32_t fillHistory) {
SEpSet* pEpset, int32_t fillHistory) {
SArray* pSinkTaskList = addNewTaskList(pTasksList);
if (pStream->fixedSinkVgId == 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, pEpset, fillHistory) < 0) {
// TODO free
return -1;
}
} else {
if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg,
fillHistory) < 0) {
pEpset, fillHistory) < 0) {
// TODO free
return -1;
}
@ -599,11 +571,11 @@ static void setSinkTaskUpstreamInfo(SArray* pTasksList, const SStreamTask* pUpst
SArray* pSinkTaskList = taosArrayGetP(pTasksList, SINK_NODE_LEVEL);
for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) {
SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i);
setTaskUpstreamEpInfo(pUpstreamTask, pSinkTask);
streamTaskSetUpstreamInfo(pSinkTask, pUpstreamTask);
}
}
static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) {
static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey, SEpSet* pEpset) {
SSdb* pSdb = pMnode->pSdb;
int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans);
@ -626,7 +598,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan*
hasExtraSink = true;
SArray* pSinkTaskList = NULL;
int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0);
int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, pEpset, 0);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -634,7 +606,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan*
// check for fill history
if (pStream->conf.fillHistory) {
SArray* pHSinkTaskList = NULL;
code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1);
code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, pEpset, 1);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -649,7 +621,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan*
SStreamTask* pAggTask = NULL;
SStreamTask* pHAggTask = NULL;
int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask);
int32_t code = addAggTask(pStream, pMnode, pPlan, pEpset, &pAggTask, &pHAggTask);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -658,9 +630,9 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan*
setSinkTaskUpstreamInfo(pStream->pHTasksList, pHAggTask);
// source level
return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey);
return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, pEpset, nextWindowSkey);
} else if (numOfPlanLevel == 1) {
return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey);
return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, pEpset, hasExtraSink, nextWindowSkey);
}
return 0;
@ -673,7 +645,10 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindo
return -1;
}
int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey);
SEpSet mnodeEpset = {0};
mndGetMnodeEpSet(pMnode, &mnodeEpset);
int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey, &mnodeEpset);
qDestroyQueryPlan(pPlan);
return code;

File diff suppressed because it is too large Load Diff

View File

@ -100,7 +100,6 @@ static int32_t mndTransGetActionsSize(SArray *pArray) {
return rawDataLen;
}
static int32_t mndTransEncodeAction(SSdbRaw *pRaw, int32_t *offset, SArray *pActions, int32_t actionsNum) {
int32_t dataPos = *offset;
int8_t unused = 0;
@ -577,7 +576,7 @@ STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, ETrnConflct conflict,
pTrans->undoActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction));
pTrans->commitActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction));
pTrans->pRpcArray = taosArrayInit(1, sizeof(SRpcHandleInfo));
pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : 0;
pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : tGenIdPI64();
taosInitRWLatch(&pTrans->lockRpcArray);
taosThreadMutexInit(&pTrans->mutex, NULL);

View File

@ -46,7 +46,7 @@ SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) {
void sdbFreeRaw(SSdbRaw *pRaw) {
if (pRaw != NULL) {
#if 1
mTrace("raw:%p, is freed", pRaw);
mTrace("raw:%p, is freed, len:%d, table:%s", pRaw, pRaw->dataLen, sdbTableName(pRaw->type));
#endif
taosMemoryFree(pRaw);
}

View File

@ -57,45 +57,49 @@ FAIL:
}
int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0);
pTask->refCnt = 1;
pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->inputQueue = streamQueueOpen(512 << 10);
pTask->outputInfo.queue = streamQueueOpen(512 << 10);
if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) {
return -1;
ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamInfoList) != 0);
int32_t code = streamTaskInit(pTask, pSnode->pMeta, &pSnode->msgCb, ver);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
pTask->tsInfo.init = taosGetTimestampMs();
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMsgCb = &pSnode->msgCb;
pTask->chkInfo.version = ver;
pTask->pMeta = pSnode->pMeta;
streamTaskOpenAllUpstreamInput(pTask);
pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1);
if (pTask->pState == NULL) {
qError("s-task:%s failed to open state for task", pTask->id.idStr);
return -1;
} else {
qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState);
}
int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamInfoList);
SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory };
initStreamStateAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId);
ASSERT(pTask->exec.pExecutor);
taosThreadMutexInit(&pTask->lock, NULL);
streamTaskResetUpstreamStageInfo(pTask);
streamSetupScheduleTrigger(pTask);
qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE,
pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel);
SCheckpointInfo* pChkInfo = &pTask->chkInfo;
// checkpoint ver is the kept version, handled data should be the next version.
if (pTask->chkInfo.checkpointId != 0) {
pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1;
qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr,
pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer);
} else {
if (pTask->chkInfo.currentVer == -1) {
pTask->chkInfo.currentVer = 0;
}
}
qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64
" child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms",
SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer,
pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus),
pTask->info.fillHistory, pTask->triggerParam);
return 0;
}
@ -113,12 +117,16 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) {
}
pSnode->msgCb = pOption->msgCb;
pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE);
pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1);
if (pSnode->pMeta == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto FAIL;
}
// todo fix it: send msg to mnode to rollback to an existed checkpoint, and broadcast the rollback msg to all other
// computing nodes.
pSnode->pMeta->stage = 0;
return pSnode;
FAIL:
@ -128,6 +136,7 @@ FAIL:
}
void sndClose(SSnode *pSnode) {
streamMetaNotifyClose(pSnode->pMeta);
streamMetaCommit(pSnode->pMeta);
streamMetaClose(pSnode->pMeta);
taosMemoryFree(pSnode->path);
@ -216,7 +225,7 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) {
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId);
if (pTask) {
SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessDispatchMsg(pTask, &req, &rsp, exec);
streamMetaReleaseTask(pSnode->pMeta, pTask);
return 0;
@ -237,7 +246,7 @@ int32_t sndProcessTaskRetrieveReq(SSnode *pSnode, SRpcMsg *pMsg) {
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.dstTaskId);
if (pTask) {
SRpcMsg rsp = { .info = pMsg->info, .code = 0};
SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessRetrieveReq(pTask, &req, &rsp);
streamMetaReleaseTask(pSnode->pMeta, pTask);
tDeleteStreamRetrieveReq(&req);
@ -343,7 +352,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId);
if (pTask != NULL) {
rsp.status = streamTaskCheckStatus(pTask);
rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage);
streamMetaReleaseTask(pSnode->pMeta, pTask);
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
@ -351,8 +360,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} else {
rsp.status = 0;
qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64
") from task:0x%x (vgId:%d), rsp status %d",
qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d",
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
}
@ -424,13 +432,13 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
return sndProcessTaskRetrieveReq(pSnode, pMsg);
case TDMT_STREAM_RETRIEVE_RSP:
return sndProcessTaskRetrieveRsp(pSnode, pMsg);
case TDMT_STREAM_SCAN_HISTORY_FINISH:
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH:
return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg);
case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP:
return sndProcessTaskRecoverFinishRsp(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK:
case TDMT_VND_STREAM_TASK_CHECK:
return sndProcessStreamTaskCheckReq(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
case TDMT_VND_STREAM_TASK_CHECK_RSP:
return sndProcessStreamTaskCheckRsp(pSnode, pMsg);
default:
ASSERT(0);

View File

@ -67,6 +67,9 @@ set(
"src/tq/tqRestore.c"
"src/tq/tqSnapshot.c"
"src/tq/tqOffsetSnapshot.c"
"src/tq/tqStreamStateSnap.c"
"src/tq/tqStreamTaskSnap.c"
)
aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES)

View File

@ -88,11 +88,14 @@ typedef struct {
int64_t snapshotVer;
SWalReader* pWalReader;
SWalRef* pRef;
// STqPushHandle pushHandle; // push
// STqPushHandle pushHandle; // push
STqExecHandle execHandle; // exec
SRpcMsg* msg;
tq_handle_status status;
} STqHandle;
typedef struct {
int64_t snapshotVer;
} SStreamHandle;
struct STQ {
SVnode* pVnode;
@ -109,17 +112,10 @@ struct STQ {
SStreamMeta* pStreamMeta;
};
typedef struct {
int8_t inited;
tmr_h timer;
} STqMgmt;
typedef struct {
int32_t size;
} STqOffsetHead;
static STqMgmt tqMgmt = {0};
int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle);
int32_t tDecodeSTqHandle(SDecoder* pDecoder, STqHandle* pHandle);
void tqDestroyTqHandle(void* data);
@ -159,7 +155,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore);
// tqSink
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr);
void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data);
void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data);
// tqOffset
char* tqOffsetBuildFName(const char* path, int32_t fVer);
@ -176,6 +172,8 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ
int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId,
int32_t type, int64_t sver, int64_t ever);
int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset);
void tqUpdateNodeStage(STQ* pTq);
#ifdef __cplusplus
}
#endif

View File

@ -89,6 +89,7 @@ typedef struct SQueryNode SQueryNode;
#define VNODE_RSMA0_DIR "tsdb"
#define VNODE_RSMA1_DIR "rsma1"
#define VNODE_RSMA2_DIR "rsma2"
#define VNODE_TQ_STREAM "stream"
#define VNODE_BUFPOOL_SEGMENTS 3
@ -219,11 +220,14 @@ void tqCleanUp();
STQ* tqOpen(const char* path, SVnode* pVnode);
void tqNotifyClose(STQ*);
void tqClose(STQ*);
int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver);
int tqPushMsg(STQ*, tmsg_t msgType);
int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg);
int tqUnregisterPushHandle(STQ* pTq, void* pHandle);
int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed.
int tqCheckStreamStatus(STQ* pTq);
int tqStartStreamTasks(STQ* pTq, bool ckPause); // restore all stream tasks after vnode launching completed.
int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqCheckStreamStatus(STQ* pTq);
int tqCommit(STQ*);
int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd);
@ -313,6 +317,26 @@ int32_t tqOffsetWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqOffsetWriter
int32_t tqOffsetWriterClose(STqOffsetWriter** ppWriter, int8_t rollback);
int32_t tqOffsetSnapWrite(STqOffsetWriter* pWriter, uint8_t* pData, uint32_t nData);
// SStreamTaskWriter ======================================
int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader);
int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader);
int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData);
int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter);
int32_t streamTaskSnapWriterClose(SStreamTaskWriter* ppWriter, int8_t rollback);
int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData);
int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader);
int32_t streamStateSnapReaderClose(SStreamStateReader* pReader);
int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData);
int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter);
int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback);
int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData);
int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId);
int32_t streamStateLoadTasks(SStreamStateWriter* pWriter);
// SStreamTaskReader ======================================
// SStreamStateWriter =====================================
// SStreamStateReader =====================================
@ -476,7 +500,9 @@ enum {
SNAP_DATA_TQ_HANDLE = 7,
SNAP_DATA_TQ_OFFSET = 8,
SNAP_DATA_STREAM_TASK = 9,
SNAP_DATA_STREAM_STATE = 10,
SNAP_DATA_STREAM_TASK_CHECKPOINT = 10,
SNAP_DATA_STREAM_STATE = 11,
SNAP_DATA_STREAM_STATE_BACKEND = 12,
};
struct SSnapDataHdr {

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) {
return 0;
}
int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) {
int32_t tqPushMsg(STQ* pTq, tmsg_t msgType) {
if (msgType == TDMT_VND_SUBMIT) {
tqProcessSubmitReqForSubscribe(pTq);
}
@ -39,20 +39,14 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v
int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta);
taosRUnLockLatch(&pTq->pStreamMeta->lock);
tqTrace("handle submit, restore:%d, size:%d", pTq->pVnode->restored, numOfTasks);
tqDebug("handle submit, restore:%d, numOfTasks:%d", pTq->pVnode->restored, numOfTasks);
// push data for stream processing:
// 1. the vnode has already been restored.
// 2. the vnode should be the leader.
// 3. the stream is not suspended yet.
if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) {
if (numOfTasks == 0) {
return 0;
}
if (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE) {
tqStartStreamTasks(pTq);
}
if ((!tsDisableStream) && (numOfTasks > 0) && (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE)) {
tqStartStreamTasks(pTq, true);
}
return 0;

View File

@ -14,9 +14,10 @@
*/
#include "tq.h"
#include "vnd.h"
static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle);
static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId);
static int32_t doSetOffsetForWalReader(SStreamTask* pTask, int32_t vgId);
// this function should be executed by stream threads.
// extract submit block from WAL, and add them into the input queue for the sources tasks.
@ -29,7 +30,7 @@ int32_t tqStreamTasksScanWal(STQ* pTq) {
int32_t scan = pMeta->walScanCounter;
tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, scan);
// check all restore tasks
// check all tasks
bool shouldIdle = true;
createStreamTaskRunReq(pTq->pStreamMeta, &shouldIdle);
@ -61,7 +62,7 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) {
SStreamMeta* pMeta = pTq->pStreamMeta;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks);
tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
@ -71,20 +72,15 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) {
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
taosWUnLockLatch(&pMeta->lock);
// broadcast the check downstream tasks msg
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId);
if (pTask == NULL) {
continue;
}
if (pTask->info.fillHistory == 1) {
tqDebug("s-task:%s fill-history task, wait for related stream task:0x%x to launch it", pTask->id.idStr,
pTask->streamTaskId.taskId);
continue;
}
streamTaskDoCheckDownstreamTasks(pTask);
streamTaskCheckDownstreamTasks(pTask);
streamMetaReleaseTask(pMeta, pTask);
}
@ -125,10 +121,15 @@ int32_t tqCheckStreamStatus(STQ* pTq) {
return 0;
}
int32_t tqStartStreamTasks(STQ* pTq) {
int32_t tqStartStreamTasks(STQ* pTq, bool ckPause) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
// for follower or vnode does not restored, do not launch the stream tasks.
if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) {
return TSDB_CODE_SUCCESS;
}
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
@ -146,6 +147,16 @@ int32_t tqStartStreamTasks(STQ* pTq) {
return 0;
}
int32_t numOfPauseTasks = pTq->pStreamMeta->pauseTaskNum;
if (ckPause && numOfTasks == numOfPauseTasks) {
tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId);
// reset the counter value, since we do not launch the scan wal operation.
pMeta->walScanCounter = 0;
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -166,12 +177,12 @@ int32_t tqStartStreamTasks(STQ* pTq) {
return 0;
}
int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
int32_t doSetOffsetForWalReader(SStreamTask* pTask, int32_t vgId) {
// seek the stored version and extract data from WAL
int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader);
if (pTask->chkInfo.currentVer < firstVer) {
tqWarn("vgId:%d s-task:%s ver:%"PRId64" earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, vgId,
pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer);
tqWarn("vgId:%d s-task:%s ver:%" PRId64 " earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64,
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer);
pTask->chkInfo.currentVer = firstVer;
@ -192,7 +203,8 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
}
// append the data for the stream
tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.currentVer);
tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.currentVer);
}
}
@ -222,7 +234,7 @@ static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) {
double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0;
qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el);
appendTranstateIntoInputQ(pTask);
/*int32_t code = */streamSchedExec(pTask);
/*int32_t code = */ streamSchedExec(pTask);
} else {
qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 ", not scan wal",
id, ver, maxVer);
@ -252,7 +264,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
numOfTasks = taosArrayGetSize(pTaskList);
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pStreamMeta, pTaskId->streamId, pTaskId->taskId);
if (pTask == NULL) {
continue;
@ -266,8 +278,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
continue;
}
const char* pStatus = streamGetTaskStatusStr(status);
if (status != TASK_STATUS__NORMAL) {
tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status));
tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, pStatus);
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
@ -303,18 +316,28 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
continue;
}
int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue);
int64_t maxVer = (pTask->info.fillHistory == 1)? pTask->dataRange.range.maxVer:INT64_MAX;
int32_t numOfItems = streamTaskGetInputQItems(pTask);
int64_t maxVer = (pTask->info.fillHistory == 1) ? pTask->dataRange.range.maxVer : INT64_MAX;
SStreamQueueItem* pItem = NULL;
code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, maxVer, pTask->id.idStr);
code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, pTask->id.idStr);
if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue
if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItems == 0)) { // failed, continue
checkForFillHistoryVerRange(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader));
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
taosThreadMutexLock(&pTask->lock);
pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
if (pTask->status.taskStatus != TASK_STATUS__NORMAL) {
tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
if (pItem != NULL) {
noDataInWal = false;
code = tAppendDataToInputQueue(pTask, pItem);
@ -329,7 +352,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
}
}
if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) {
taosThreadMutexUnlock(&pTask->lock);
if ((code == TSDB_CODE_SUCCESS) || (numOfItems > 0)) {
code = streamSchedExec(pTask);
if (code != TSDB_CODE_SUCCESS) {
streamMetaReleaseTask(pStreamMeta, pTask);
@ -348,4 +373,3 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
taosArrayDestroy(pTaskList);
return 0;
}

View File

@ -21,9 +21,14 @@
typedef struct STableSinkInfo {
uint64_t uid;
char tbName[TSDB_TABLE_NAME_LEN];
tstr name;
} STableSinkInfo;
static int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid,
SSDataBlock* pDataBlock, SStreamTask* pTask);
static int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask,
int64_t suid);
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr) {
int32_t totalRows = pDataBlock->info.rows;
@ -97,17 +102,17 @@ end:
return ret;
}
static int32_t tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) {
static bool tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) {
void* pVal = tSimpleHashGet(pTableInfoMap, &groupId, sizeof(uint64_t));
if (pVal) {
*pInfo = *(STableSinkInfo**)pVal;
return TSDB_CODE_SUCCESS;
return true;
}
return TSDB_CODE_FAILED;
return false;
}
int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) {
static int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) {
if (tSimpleHashGetSize(tblInfo) > MAX_CACHE_TABLE_INFO_NUM) {
return TSDB_CODE_FAILED;
}
@ -115,7 +120,7 @@ int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTb
return tSimpleHashPut(tblInfo, &groupId, sizeof(uint64_t), &pTbl, POINTER_BYTES);
}
int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) {
static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) {
void* buf = NULL;
int32_t tlen = 0;
encodeCreateChildTableForRPC(pReqs, TD_VID(pVnode), &buf, &tlen);
@ -128,66 +133,40 @@ int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) {
return TSDB_CODE_SUCCESS;
}
void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data) {
void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data) {
const SArray* pBlocks = (const SArray*)data;
SVnode* pVnode = (SVnode*)vnode;
int64_t suid = pTask->tbSink.stbUid;
char* stbFullName = pTask->tbSink.stbFullName;
STSchema* pTSchema = pTask->tbSink.pTSchema;
int32_t vgId = TD_VID(pVnode);
int32_t numOfBlocks = taosArrayGetSize(pBlocks);
int32_t code = TSDB_CODE_SUCCESS;
int32_t blockSz = taosArrayGetSize(pBlocks);
tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table", vgId, pTask->id.idStr, numOfBlocks);
tqDebug("vgId:%d, s-task:%s write results %d blocks into table", TD_VID(pVnode), pTask->id.idStr, blockSz);
void* pBuf = NULL;
SArray* tagArray = NULL;
SArray* pVals = NULL;
SArray* crTblArray = NULL;
for (int32_t i = 0; i < blockSz; i++) {
for (int32_t i = 0; i < numOfBlocks; i++) {
SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i);
int32_t rows = pDataBlock->info.rows;
if (pDataBlock->info.type == STREAM_DELETE_RESULT) {
SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))};
tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr);
if (taosArrayGetSize(deleteReq.deleteReqs) == 0) {
taosArrayDestroy(deleteReq.deleteReqs);
continue;
}
int32_t len;
int32_t code;
tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code);
if (code != TSDB_CODE_SUCCESS) {
qError("s-task:%s failed to encode delete request", pTask->id.idStr);
}
SEncoder encoder;
void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead));
void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead));
tEncoderInit(&encoder, abuf, len);
tEncodeSBatchDeleteReq(&encoder, &deleteReq);
tEncoderClear(&encoder);
taosArrayDestroy(deleteReq.deleteReqs);
((SMsgHead*)serializedDeleteReq)->vgId = pVnode->config.vgId;
SRpcMsg msg = { .msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead) };
if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) {
tqDebug("failed to put delete req into write-queue since %s", terrstr());
}
code = doSinkDeleteBlock(pVnode, stbFullName, pDataBlock, pTask, suid);
} else if (pDataBlock->info.type == STREAM_CREATE_CHILD_TABLE) {
tqDebug("s-task:%s build create table msg", pTask->id.idStr);
SVCreateTbBatchReq reqs = {0};
crTblArray = reqs.pArray = taosArrayInit(1, sizeof(struct SVCreateTbReq));
crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq));
if (NULL == reqs.pArray) {
goto _end;
}
for (int32_t rowId = 0; rowId < rows; rowId++) {
SVCreateTbReq createTbReq = {0};
SVCreateTbReq* pCreateTbReq = &createTbReq;
SVCreateTbReq* pCreateTbReq = &((SVCreateTbReq){0});
// set const
pCreateTbReq->flags = 0;
@ -203,16 +182,14 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
int32_t size = taosArrayGetSize(pDataBlock->pDataBlock);
if (size == 2) {
tagArray = taosArrayInit(1, sizeof(STagVal));
if (!tagArray) {
tdDestroySVCreateTbReq(pCreateTbReq);
goto _end;
}
STagVal tagVal = {
.cid = pTSchema->numOfCols + 1,
.type = TSDB_DATA_TYPE_UBIGINT,
.i64 = (int64_t)pDataBlock->info.id.groupId,
};
.cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId};
taosArrayPush(tagArray, &tagVal);
@ -227,6 +204,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
tdDestroySVCreateTbReq(pCreateTbReq);
goto _end;
}
for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) {
SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId);
@ -236,12 +214,13 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
continue;
} else if (IS_VAR_DATA_TYPE(pTagData->info.type)) {
tagVal.nData = varDataLen(pData);
tagVal.pData = varDataVal(pData);
tagVal.pData = (uint8_t*) varDataVal(pData);
} else {
memcpy(&tagVal.i64, pData, pTagData->info.bytes);
}
taosArrayPush(tagArray, &tagVal);
}
}
pCreateTbReq->ctb.tagNum = TMAX(size - UD_TAG_COLUMN_INDEX, 1);
@ -254,7 +233,6 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
goto _end;
}
pCreateTbReq->ctb.pTag = (uint8_t*)pTag;
// set table name
@ -265,61 +243,106 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
} else {
pCreateTbReq->name = taosStrdup(pDataBlock->info.parTbName);
}
taosArrayPush(reqs.pArray, pCreateTbReq);
tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name);
}
reqs.nReqs = taosArrayGetSize(reqs.pArray);
if (tqPutReqToQueue(pVnode, &reqs) != TSDB_CODE_SUCCESS) {
goto _end;
}
tagArray = taosArrayDestroy(tagArray);
taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq);
crTblArray = NULL;
} else if (pDataBlock->info.type == STREAM_CHECKPOINT) {
continue;
} else {
SSubmitTbData tbData = {0};
tqDebug("tq sink pipe, convert block:%d, rows:%d", i, rows);
if (!(tbData.aRowP = taosArrayInit(rows, sizeof(SRow*)))) {
goto _end;
code = doSinkResultBlock(pVnode, i, stbFullName, suid, pDataBlock, pTask);
}
}
tbData.suid = suid;
tbData.uid = 0; // uid is assigned by vnode
tbData.sver = pTSchema->version;
tqDebug("vgId:%d, s-task:%s write results completed", vgId, pTask->id.idStr);
STableSinkInfo* pTableSinkInfo = NULL;
int32_t res = tqGetTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, &pTableSinkInfo);
if (res != TSDB_CODE_SUCCESS) {
pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo));
_end:
taosArrayDestroy(tagArray);
taosArrayDestroy(pVals);
taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq);
// TODO: change
}
int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask,
int64_t suid) {
SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))};
int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
if (taosArrayGetSize(deleteReq.deleteReqs) == 0) {
taosArrayDestroy(deleteReq.deleteReqs);
return TSDB_CODE_SUCCESS;
}
int32_t len;
tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code);
if (code != TSDB_CODE_SUCCESS) {
qError("s-task:%s failed to encode delete request", pTask->id.idStr);
return code;
}
SEncoder encoder;
void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead));
void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead));
tEncoderInit(&encoder, abuf, len);
tEncodeSBatchDeleteReq(&encoder, &deleteReq);
tEncoderClear(&encoder);
taosArrayDestroy(deleteReq.deleteReqs);
((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode);
SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)};
if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) {
tqDebug("failed to put delete req into write-queue since %s", terrstr());
}
return TSDB_CODE_SUCCESS;
}
static bool isValidDestChildTable(SMetaReader* pReader, int32_t vgId, char* ctbName, int64_t suid) {
if (pReader->me.type != TSDB_CHILD_TABLE) {
tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type);
return false;
}
if (pReader->me.ctbEntry.suid != suid) {
tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid:%" PRId64 ", actual:%" PRId64,
vgId, ctbName, suid, pReader->me.ctbEntry.suid);
return false;
}
return true;
}
static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock) {
char* ctbName = pDataBlock->info.parTbName;
if (!ctbName[0]) {
memset(ctbName, 0, TSDB_TABLE_NAME_LEN);
if (res == TSDB_CODE_SUCCESS) {
memcpy(ctbName, pTableSinkInfo->tbName, strlen(pTableSinkInfo->tbName));
} else {
buildCtbNameByGroupIdImpl(stbFullName, pDataBlock->info.id.groupId, ctbName);
memcpy(pTableSinkInfo->tbName, ctbName, strlen(ctbName));
tqDebug("vgId:%d, gropuId:%" PRIu64 " datablock table name is null", TD_VID(pVnode),
pDataBlock->info.id.groupId);
}
SVCreateTbReq* pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq));
if (pCreateTbReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
if (res == TSDB_CODE_SUCCESS) {
tbData.uid = pTableSinkInfo->uid;
} else {
SMetaReader mr = {0};
metaReaderDoInit(&mr, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mr, ctbName) < 0) {
metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo);
tqDebug("vgId:%d, stream write into %s, table auto created", TD_VID(pVnode), ctbName);
SVCreateTbReq* pCreateTbReq = NULL;
if (!(pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)))) {
goto _end;
};
// set tag content
SArray* tagArray = taosArrayInit(1, sizeof(STagVal));
if (tagArray == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tdDestroySVCreateTbReq(pCreateTbReq);
taosMemoryFreeClear(pCreateTbReq);
return NULL;
}
// set const
pCreateTbReq->flags = 0;
@ -329,91 +352,206 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
// set super table name
SName name = {0};
tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); // taosStrdup(stbFullName);
pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name));
// set tag content
tagArray = taosArrayInit(1, sizeof(STagVal));
if (!tagArray) {
tdDestroySVCreateTbReq(pCreateTbReq);
taosMemoryFreeClear(pCreateTbReq);
goto _end;
}
STagVal tagVal = {
.cid = pTSchema->numOfCols + 1,
.type = TSDB_DATA_TYPE_UBIGINT,
.i64 = (int64_t)pDataBlock->info.id.groupId,
};
STagVal tagVal = { .cid = numOfCols, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId};
taosArrayPush(tagArray, &tagVal);
pCreateTbReq->ctb.tagNum = taosArrayGetSize(tagArray);
STag* pTag = NULL;
tTagNew(tagArray, 1, false, &pTag);
tagArray = taosArrayDestroy(tagArray);
taosArrayDestroy(tagArray);
if (pTag == NULL) {
tdDestroySVCreateTbReq(pCreateTbReq);
taosMemoryFreeClear(pCreateTbReq);
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _end;
return NULL;
}
pCreateTbReq->ctb.pTag = (uint8_t*)pTag;
// set tag name
SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN);
char tagNameStr[TSDB_COL_NAME_LEN] = {0};
strcpy(tagNameStr, "group_id");
taosArrayPush(tagName, tagNameStr);
char k[TSDB_COL_NAME_LEN] = "group_id";
taosArrayPush(tagName, k);
pCreateTbReq->ctb.tagName = tagName;
// set table name
pCreateTbReq->name = taosStrdup(ctbName);
return pCreateTbReq;
}
tbData.pCreateTbReq = pCreateTbReq;
tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE;
static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, uint64_t uid,
const char* id) {
pTableSinkInfo->uid = uid;
int32_t code = tqPutTableInfo(pSinkTableMap, groupId, pTableSinkInfo);
if (code != TSDB_CODE_SUCCESS) {
taosMemoryFreeClear(pTableSinkInfo);
tqError("s-task:%s failed to put tableSinkInfo in to cache, code:%s", id, tstrerror(code));
} else {
if (mr.me.type != TSDB_CHILD_TABLE) {
tqError("vgId:%d, failed to write into %s, since table type incorrect, type %d", TD_VID(pVnode), ctbName,
mr.me.type);
metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo);
continue;
tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data,
pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap));
}
if (mr.me.ctbEntry.suid != suid) {
tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid: %" PRId64
", actual suid %" PRId64 "",
TD_VID(pVnode), ctbName, suid, mr.me.ctbEntry.suid);
metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo);
continue;
return code;
}
int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, SSDataBlock* pDataBlock,
SStreamTask* pTask) {
int32_t numOfRows = pDataBlock->info.rows;
int32_t vgId = TD_VID(pVnode);
uint64_t groupId = pDataBlock->info.id.groupId;
STSchema* pTSchema = pTask->tbSink.pTSchema;
int32_t code = TSDB_CODE_SUCCESS;
void* pBuf = NULL;
SArray* pVals = NULL;
const char* id = pTask->id.idStr;
SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version};
tqDebug("s-task:%s sink data pipeline, build submit msg from %d-th resBlock, including %d rows, dst suid:%" PRId64,
id, blockIndex + 1, numOfRows, suid);
tbData.aRowP = taosArrayInit(numOfRows, sizeof(SRow*));
pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal));
if (tbData.aRowP == NULL || pVals == NULL) {
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
code = TSDB_CODE_OUT_OF_MEMORY;
tqError("s-task:%s vgId:%d failed to prepare write stream res blocks, code:%s", id, vgId, tstrerror(code));
return code;
}
STableSinkInfo* pTableSinkInfo = NULL;
bool exist = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo);
char* dstTableName = pDataBlock->info.parTbName;
if (exist) {
if (dstTableName[0] == 0) {
tstrncpy(dstTableName, pTableSinkInfo->name.data, pTableSinkInfo->name.len + 1);
tqDebug("s-task:%s vgId:%d, gropuId:%" PRIu64 " datablock table name is null, set name:%s", id, vgId, groupId,
dstTableName);
} else {
if (pTableSinkInfo->uid != 0) {
tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(uid:%" PRIu64 ")", id, numOfRows, groupId,
dstTableName, pTableSinkInfo->uid);
} else {
tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(not set uid yet for the secondary block)",
id, numOfRows, groupId, dstTableName);
}
}
} else { // not exist
if (dstTableName[0] == 0) {
memset(dstTableName, 0, TSDB_TABLE_NAME_LEN);
buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName);
}
int32_t nameLen = strlen(dstTableName);
pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo) + nameLen);
pTableSinkInfo->name.len = nameLen;
memcpy(pTableSinkInfo->name.data, dstTableName, nameLen);
tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName);
}
if (exist) {
tbData.uid = pTableSinkInfo->uid;
if (tbData.uid == 0) {
tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id);
}
while (pTableSinkInfo->uid == 0) {
// wait for the table to be created
SMetaReader mr = {0};
metaReaderDoInit(&mr, pVnode->pMeta, 0);
code = metaGetTableEntryByName(&mr, dstTableName);
if (code == 0) { // table alreay exists, check its type and uid
bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid);
if (!isValid) { // not valid table, ignore it
metaReaderClear(&mr);
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
return TSDB_CODE_SUCCESS;
} else {
tqDebug("s-task:%s set uid:%"PRIu64" for dstTable:%s from meta", id, mr.me.uid, pTableSinkInfo->name.data);
tbData.uid = mr.me.uid;
pTableSinkInfo->uid = mr.me.uid;
int32_t code = tqPutTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, pTableSinkInfo);
if (code != TSDB_CODE_SUCCESS) {
taosMemoryFreeClear(pTableSinkInfo);
}
metaReaderClear(&mr);
}
} else { // not exist, wait and retry
metaReaderClear(&mr);
taosMsleep(100);
tqDebug("s-task:%s wait for the table:%s ready before insert data", id, dstTableName);
}
}
} else {
// todo: this check is not safe, and results in losing of submit message from WAL.
// The auto-create option will always set to be open for those submit messages, which arrive during the period
// the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning
// data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for
// those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have
// randomly generated false table uid in the WAL.
SMetaReader mr = {0};
metaReaderDoInit(&mr, pVnode->pMeta, 0);
// table not in cache, let's try the extract it from tsdb meta
if (metaGetTableEntryByName(&mr, dstTableName) < 0) {
metaReaderClear(&mr);
tqDebug("s-task:%s stream write into table:%s, table auto created", id, dstTableName);
tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE;
tbData.pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock);
if (tbData.pCreateTbReq == NULL) {
tqError("s-task:%s failed to build auto create table req, code:%s", id, tstrerror(terrno));
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
return terrno;
}
doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, 0, id);
} else {
bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid);
if (!isValid) {
metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo);
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
return TSDB_CODE_SUCCESS;
} else {
tbData.uid = mr.me.uid;
metaReaderClear(&mr);
doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, tbData.uid, id);
}
}
}
// rows
if (!pVals && !(pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)))) {
taosArrayDestroy(tbData.aRowP);
tdDestroySVCreateTbReq(tbData.pCreateTbReq);
goto _end;
}
for (int32_t j = 0; j < rows; j++) {
for (int32_t j = 0; j < numOfRows; j++) {
taosArrayClear(pVals);
int32_t dataIndex = 0;
for (int32_t k = 0; k < pTSchema->numOfCols; k++) {
const STColumn* pCol = &pTSchema->columns[k];
if (k == 0) {
SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex);
void* colData = colDataGetData(pColData, j);
tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData);
tqDebug("s-task:%s tq sink pipe2, row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData);
}
if (IS_SET_NULL(pCol)) {
SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type);
taosArrayPush(pVals, &cv);
@ -440,11 +578,17 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
}
}
}
SRow* pRow = NULL;
if ((terrno = tRowBuild(pVals, (STSchema*)pTSchema, &pRow)) < 0) {
code = tRowBuild(pVals, (STSchema*)pTSchema, &pRow);
if (code != TSDB_CODE_SUCCESS) {
tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE);
goto _end;
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
return code;
}
ASSERT(pRow);
taosArrayPush(tbData.aRowP, &pRow);
}
@ -452,49 +596,55 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
SSubmitReq2 submitReq = {0};
if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) {
tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE);
goto _end;
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
return TSDB_CODE_OUT_OF_MEMORY;
}
taosArrayPush(submitReq.aSubmitTbData, &tbData);
// encode
int32_t len;
int32_t code;
int32_t len = 0;
tEncodeSize(tEncodeSubmitReq, &submitReq, len, code);
SEncoder encoder;
len += sizeof(SSubmitReq2Msg);
pBuf = rpcMallocCont(len);
if (NULL == pBuf) {
tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE);
goto _end;
taosArrayDestroy(tbData.aRowP);
taosArrayDestroy(pVals);
}
((SSubmitReq2Msg*)pBuf)->header.vgId = TD_VID(pVnode);
((SSubmitReq2Msg*)pBuf)->header.vgId = vgId;
((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len);
((SSubmitReq2Msg*)pBuf)->version = htobe64(1);
tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg));
if (tEncodeSubmitReq(&encoder, &submitReq) < 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("failed to encode submit req since %s", terrstr());
tqError("failed to encode submit req, code:%s, ignore and continue", terrstr());
tEncoderClear(&encoder);
rpcFreeCont(pBuf);
tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE);
continue;
return code;
}
tEncoderClear(&encoder);
tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE);
SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len };
if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) {
tqDebug("failed to put into write-queue since %s", terrstr());
}
}
code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg);
if(code == TSDB_CODE_SUCCESS) {
tqDebug("s-task:%s send submit msg to dstTable:%s, numOfRows:%d", id, pTableSinkInfo->name.data, numOfRows);
} else {
tqError("s-task:%s failed to put into write-queue since %s", id, terrstr());
}
tqDebug("vgId:%d, s-task:%s write results completed", TD_VID(pVnode), pTask->id.idStr);
_end:
taosArrayDestroy(tagArray);
taosArrayDestroy(pVals);
taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq);
// TODO: change
return code;
}

View File

@ -14,128 +14,122 @@
*/
#include "meta.h"
#include "streamSnapshot.h"
#include "tdbInt.h"
#include "tq.h"
// STqSnapReader ========================================
struct STqSnapReader {
struct SStreamStateReader {
STQ* pTq;
int64_t sver;
int64_t ever;
TBC* pCur;
SStreamSnapReader* pReaderImpl;
int32_t complete; // open reader or not
};
int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) {
int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader) {
int32_t code = 0;
STqSnapReader* pReader = NULL;
SStreamStateReader* pReader = NULL;
char tdir[TSDB_FILENAME_LEN * 2] = {0};
// alloc
pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader));
pReader = (SStreamStateReader*)taosMemoryCalloc(1, sizeof(SStreamStateReader));
if (pReader == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
SStreamMeta* meta = pTq->pStreamMeta;
pReader->pTq = pTq;
pReader->sver = sver;
pReader->ever = ever;
// impl
code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL);
if (code) {
int64_t chkpId = meta ? meta->chkpId : 0;
SStreamSnapReader* pSnapReader = NULL;
if (streamSnapReaderOpen(pTq, sver, chkpId, pTq->path, &pSnapReader) == 0) {
pReader->complete = 1;
} else {
code = -1;
taosMemoryFree(pReader);
goto _err;
}
pReader->pReaderImpl = pSnapReader;
code = tdbTbcMoveToFirst(pReader->pCur);
if (code) {
taosMemoryFree(pReader);
goto _err;
}
tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode));
tqDebug("vgId:%d, vnode %s snapshot reader opened", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER);
*ppReader = pReader;
return code;
_err:
tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode %s snapshot reader failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER,
tstrerror(code));
*ppReader = NULL;
return code;
}
int32_t tqSnapReaderClose(STqSnapReader** ppReader) {
int32_t streamStateSnapReaderClose(SStreamStateReader* pReader) {
int32_t code = 0;
tdbTbcClose((*ppReader)->pCur);
taosMemoryFree(*ppReader);
*ppReader = NULL;
tqDebug("vgId:%d, vnode %s snapshot reader closed", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER);
streamSnapReaderClose(pReader->pReaderImpl);
taosMemoryFree(pReader);
return code;
}
int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) {
int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) {
tqDebug("vgId:%d, vnode %s snapshot read data", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER);
int32_t code = 0;
const void* pKey = NULL;
const void* pVal = NULL;
int32_t kLen = 0;
int32_t vLen = 0;
SDecoder decoder;
STqHandle handle;
*ppData = NULL;
for (;;) {
if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) {
goto _exit;
if (pReader->complete == 0) {
return 0;
}
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
tDecodeSTqHandle(&decoder, &handle);
tDecoderClear(&decoder);
if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) {
tdbTbcMoveToNext(pReader->pCur);
break;
} else {
tdbTbcMoveToNext(pReader->pCur);
uint8_t* rowData = NULL;
int64_t len;
code = streamSnapRead(pReader->pReaderImpl, &rowData, &len);
if (rowData == NULL || len == 0) {
return code;
}
}
*ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen);
*ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + len);
if (*ppData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
// refactor later, avoid mem/free freq
SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData);
pHdr->type = SNAP_DATA_TQ_HANDLE;
pHdr->size = vLen;
memcpy(pHdr->data, pVal, vLen);
tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode),
handle.snapshotVer, handle.subKey, vLen);
_exit:
pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND;
pHdr->size = len;
memcpy(pHdr->data, rowData, len);
tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode));
return code;
_err:
tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-state snapshot failed to read since %s", TD_VID(pReader->pTq->pVnode),
tstrerror(code));
return code;
}
// STqSnapWriter ========================================
struct STqSnapWriter {
struct SStreamStateWriter {
STQ* pTq;
int64_t sver;
int64_t ever;
TXN* txn;
SStreamSnapWriter* pWriterImpl;
};
int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) {
int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter) {
int32_t code = 0;
STqSnapWriter* pWriter;
SStreamStateWriter* pWriter;
char tdir[TSDB_FILENAME_LEN * 2] = {0};
// alloc
pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
pWriter = (SStreamStateWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
if (pWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
@ -144,68 +138,48 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p
pWriter->sver = sver;
pWriter->ever = ever;
if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
code = -1;
taosMemoryFree(pWriter);
sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received");
taosMkDir(tdir);
SStreamSnapWriter* pSnapWriter = NULL;
if (streamSnapWriterOpen(pTq, sver, ever, tdir, &pSnapWriter) < 0) {
goto _err;
}
tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tdir);
pWriter->pWriterImpl = pSnapWriter;
*ppWriter = pWriter;
return code;
_err:
tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
*ppWriter = NULL;
return code;
}
int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) {
int32_t code = 0;
STqSnapWriter* pWriter = *ppWriter;
STQ* pTq = pWriter->pTq;
if (rollback) {
tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn);
} else {
code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
}
tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER,
tstrerror(code));
taosMemoryFree(pWriter);
*ppWriter = NULL;
// restore from metastore
if (tqMetaRestoreHandle(pTq) < 0) {
goto _err;
}
return code;
_err:
tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code));
return code;
return -1;
}
int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) {
int32_t code = 0;
STQ* pTq = pWriter->pTq;
SDecoder decoder = {0};
SDecoder* pDecoder = &decoder;
STqHandle handle;
tqDebug("vgId:%d, vnode %s snapshot writer closed", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
code = streamSnapWriterClose(pWriter->pWriterImpl, rollback);
tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr));
code = tDecodeSTqHandle(pDecoder, &handle);
if (code) goto _err;
code = tqMetaSaveHandle(pTq, handle.subKey, &handle);
if (code < 0) goto _err;
tDecoderClear(pDecoder);
return code;
_err:
tDecoderClear(pDecoder);
tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
return code;
}
int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) {
tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta, chkpId);
if (code == 0) {
code = streamStateLoadTasks(pWriter);
}
tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
taosMemoryFree(pWriter);
return code;
}
int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamLoadTasks(pWriter->pTq->pStreamMeta); }
int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) {
tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr));
}

View File

@ -18,19 +18,26 @@
#include "tq.h"
// STqSnapReader ========================================
struct STqSnapReader {
typedef struct {
int8_t type;
TTB* tbl;
} STablePair;
struct SStreamTaskReader {
STQ* pTq;
int64_t sver;
int64_t ever;
TBC* pCur;
SArray* tdbTbList;
int8_t pos;
};
int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) {
int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader) {
int32_t code = 0;
STqSnapReader* pReader = NULL;
SStreamTaskReader* pReader = NULL;
// alloc
pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader));
pReader = (SStreamTaskReader*)taosMemoryCalloc(1, sizeof(SStreamTaskReader));
if (pReader == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
@ -38,68 +45,101 @@ int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** p
pReader->pTq = pTq;
pReader->sver = sver;
pReader->ever = ever;
pReader->tdbTbList = taosArrayInit(4, sizeof(STablePair));
// impl
code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL);
STablePair pair1 = {.tbl = pTq->pStreamMeta->pTaskDb, .type = SNAP_DATA_STREAM_TASK};
taosArrayPush(pReader->tdbTbList, &pair1);
STablePair pair2 = {.tbl = pTq->pStreamMeta->pCheckpointDb, .type = SNAP_DATA_STREAM_TASK_CHECKPOINT};
taosArrayPush(pReader->tdbTbList, &pair2);
pReader->pos = 0;
STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos);
code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL);
if (code) {
tqInfo("vgId:%d, vnode stream-task snapshot reader failed to open, reason: %s", TD_VID(pTq->pVnode),
tstrerror(code));
taosMemoryFree(pReader);
goto _err;
}
code = tdbTbcMoveToFirst(pReader->pCur);
if (code) {
tqInfo("vgId:%d, vnode stream-task snapshot reader failed to iterate, reason: %s", TD_VID(pTq->pVnode),
tstrerror(code));
taosMemoryFree(pReader);
goto _err;
}
tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode));
tqDebug("vgId:%d, vnode stream-task snapshot reader opened", TD_VID(pTq->pVnode));
*ppReader = pReader;
return code;
_err:
tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-task snapshot reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
*ppReader = NULL;
return code;
}
int32_t tqSnapReaderClose(STqSnapReader** ppReader) {
int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader) {
int32_t code = 0;
tdbTbcClose((*ppReader)->pCur);
taosMemoryFree(*ppReader);
*ppReader = NULL;
tqInfo("vgId:%d, vnode stream-task snapshot reader closed", TD_VID(pReader->pTq->pVnode));
taosArrayDestroy(pReader->tdbTbList);
tdbTbcClose(pReader->pCur);
taosMemoryFree(pReader);
return code;
}
int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) {
int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData) {
int32_t code = 0;
const void* pKey = NULL;
const void* pVal = NULL;
void* pVal = NULL;
int32_t kLen = 0;
int32_t vLen = 0;
SDecoder decoder;
STqHandle handle;
*ppData = NULL;
int8_t except = 0;
tqDebug("vgId:%d, vnode stream-task snapshot start read data", TD_VID(pReader->pTq->pVnode));
STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos);
NextTbl:
except = 0;
for (;;) {
if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) {
goto _exit;
}
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
tDecodeSTqHandle(&decoder, &handle);
tDecoderClear(&decoder);
if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) {
tdbTbcMoveToNext(pReader->pCur);
const void* tVal = NULL;
int32_t tLen = 0;
if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &tVal, &tLen)) {
except = 1;
break;
} else {
pVal = taosMemoryCalloc(1, tLen);
memcpy(pVal, tVal, tLen);
vLen = tLen;
}
tdbTbcMoveToNext(pReader->pCur);
break;
}
}
if (except == 1) {
if (pReader->pos + 1 < taosArrayGetSize(pReader->tdbTbList)) {
tdbTbcClose(pReader->pCur);
pReader->pos += 1;
pPair = taosArrayGet(pReader->tdbTbList, pReader->pos);
code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL);
tdbTbcMoveToFirst(pReader->pCur);
goto NextTbl;
}
}
if (pVal == NULL || vLen == 0) {
*ppData = NULL;
tqDebug("vgId:%d, vnode stream-task snapshot finished read data", TD_VID(pReader->pTq->pVnode));
return code;
}
*ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen);
if (*ppData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
@ -107,35 +147,34 @@ int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) {
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData);
pHdr->type = SNAP_DATA_TQ_HANDLE;
pHdr->type = pPair->type;
pHdr->size = vLen;
memcpy(pHdr->data, pVal, vLen);
taosMemoryFree(pVal);
tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode),
handle.snapshotVer, handle.subKey, vLen);
tqDebug("vgId:%d, vnode stream-task snapshot read data vLen:%d", TD_VID(pReader->pTq->pVnode), vLen);
_exit:
return code;
_err:
tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-task snapshot read data failed since %s", TD_VID(pReader->pTq->pVnode),
tstrerror(code));
return code;
}
// STqSnapWriter ========================================
struct STqSnapWriter {
struct SStreamTaskWriter {
STQ* pTq;
int64_t sver;
int64_t ever;
TXN* txn;
};
int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) {
int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter) {
int32_t code = 0;
STqSnapWriter* pWriter;
SStreamTaskWriter* pWriter;
// alloc
pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
pWriter = (SStreamTaskWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
if (pWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
@ -144,68 +183,88 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p
pWriter->sver = sver;
pWriter->ever = ever;
if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
if (tdbBegin(pTq->pStreamMeta->db, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
code = -1;
taosMemoryFree(pWriter);
goto _err;
}
*ppWriter = pWriter;
tqDebug("vgId:%d, vnode stream-task snapshot writer opened", TD_VID(pTq->pVnode));
return code;
_err:
tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-task snapshot writer failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code));
*ppWriter = NULL;
return code;
return 0;
}
int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) {
int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) {
int32_t code = 0;
STqSnapWriter* pWriter = *ppWriter;
STQ* pTq = pWriter->pTq;
tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode));
if (rollback) {
tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn);
tdbAbort(pWriter->pTq->pStreamMeta->db, pWriter->txn);
} else {
code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn);
code = tdbCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn);
if (code) goto _err;
code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn);
code = tdbPostCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn);
if (code) goto _err;
}
taosMemoryFree(pWriter);
*ppWriter = NULL;
// restore from metastore
if (tqMetaRestoreHandle(pTq) < 0) {
goto _err;
}
// if (tqMetaRestoreHandle(pTq) < 0) {
// goto _err;
// }
return code;
_err:
tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode),
tstrerror(code));
return code;
return 0;
}
int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
STQ* pTq = pWriter->pTq;
SDecoder decoder = {0};
SDecoder* pDecoder = &decoder;
STqHandle handle;
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
if (pHdr->type == SNAP_DATA_STREAM_TASK) {
SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
if (pTask == NULL) {
return -1;
}
tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr));
code = tDecodeSTqHandle(pDecoder, &handle);
if (code) goto _err;
code = tqMetaSaveHandle(pTq, handle.subKey, &handle);
if (code < 0) goto _err;
tDecoderClear(pDecoder);
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr));
code = tDecodeStreamTask(&decoder, pTask);
if (code < 0) {
tDecoderClear(&decoder);
taosMemoryFree(pTask);
goto _err;
}
tDecoderClear(&decoder);
// tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn)
if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t),
(uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr), pWriter->txn) < 0) {
taosMemoryFree(pTask);
return -1;
}
taosMemoryFree(pTask);
} else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) {
// do nothing
}
tqDebug("vgId:%d, vnode stream-task snapshot write", TD_VID(pTq->pVnode));
return code;
_err:
tDecoderClear(pDecoder);
tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code));
tqError("vgId:%d, vnode stream-task snapshot failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code));
return code;
}

View File

@ -36,6 +36,12 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) {
return 0;
}
void tqUpdateNodeStage(STQ* pTq) {
SSyncState state = syncGetState(pTq->pVnode->sync);
pTq->pStreamMeta->stage = state.term;
tqDebug("vgId:%d update the meta stage to be:%"PRId64, pTq->pStreamMeta->vgId, pTq->pStreamMeta->stage);
}
static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) {
pRsp->reqOffset = pOffset;
pRsp->rspOffset = pOffset;
@ -400,3 +406,56 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp*
tmsgSendRsp(&rsp);
return 0;
}
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) {
SDecoder* pCoder = &(SDecoder){0};
SDeleteRes* pRes = &(SDeleteRes){0};
*pRefBlock = NULL;
pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t));
if (pRes->uidList == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
tDecoderInit(pCoder, (uint8_t*)pData, len);
tDecodeDeleteRes(pCoder, pRes);
tDecoderClear(pCoder);
int32_t numOfTables = taosArrayGetSize(pRes->uidList);
if (numOfTables == 0 || pRes->affectedRows == 0) {
taosArrayDestroy(pRes->uidList);
return TSDB_CODE_SUCCESS;
}
SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
blockDataEnsureCapacity(pDelBlock, numOfTables);
pDelBlock->info.rows = numOfTables;
pDelBlock->info.version = ver;
for (int32_t i = 0; i < numOfTables; i++) {
// start key column
SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX);
colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false); // end key column
SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX);
colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false);
// uid column
SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX);
int64_t* pUid = taosArrayGet(pRes->uidList, i);
colDataSetVal(pUidCol, i, (const char*)pUid, false);
colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i);
colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i);
colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i);
}
taosArrayDestroy(pRes->uidList);
*pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
if (*pRefBlock == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
(*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK;
(*pRefBlock)->pBlock = pDelBlock;
return TSDB_CODE_SUCCESS;
}

View File

@ -88,6 +88,9 @@ _err:
int tsdbClose(STsdb **pTsdb) {
if (*pTsdb) {
STsdb *pdb = *pTsdb;
tsdbDebug("vgId:%d, tsdb is close at %s, days:%d, keep:%d,%d,%d", TD_VID(pdb->pVnode), pdb->path, pdb->keepCfg.days,
pdb->keepCfg.keep0, pdb->keepCfg.keep1, pdb->keepCfg.keep2);
taosThreadRwlockWrlock(&(*pTsdb)->rwLock);
tsdbMemTableDestroy((*pTsdb)->mem, true);
(*pTsdb)->mem = NULL;

View File

@ -422,6 +422,15 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC
// open tq
sprintf(tdir, "%s%s%s", dir, TD_DIRSEP, VNODE_TQ_DIR);
taosRealPath(tdir, NULL, sizeof(tdir));
// open query
if (vnodeQueryOpen(pVnode)) {
vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno));
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
// sma required the tq is initialized before the vnode open
pVnode->pTq = tqOpen(tdir, pVnode);
if (pVnode->pTq == NULL) {
vError("vgId:%d, failed to open vnode tq since %s", TD_VID(pVnode), tstrerror(terrno));
@ -434,13 +443,6 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC
goto _err;
}
// open query
if (vnodeQueryOpen(pVnode)) {
vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno));
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
// vnode begin
if (vnodeBegin(pVnode) < 0) {
vError("vgId:%d, failed to begin since %s", TD_VID(pVnode), tstrerror(terrno));

View File

@ -220,9 +220,57 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData)
}
// STREAM ============
vInfo("stream task start");
if (!pReader->streamTaskDone) {
if (pReader->pStreamTaskReader == NULL) {
vInfo("stream task start 1");
code = streamTaskSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamTaskReader);
if (code) {
vInfo("stream task start err");
goto _err;
}
}
code = streamTaskSnapRead(pReader->pStreamTaskReader, ppData);
vInfo("stream task start 2");
if (code) {
vInfo("stream task start 3");
goto _err;
} else {
if (*ppData) {
goto _exit;
vInfo("stream task start 4");
} else {
pReader->streamTaskDone = 1;
code = streamTaskSnapReaderClose(pReader->pStreamTaskReader);
vInfo("stream task start 5");
if (code) goto _err;
pReader->pStreamTaskReader = NULL;
}
}
}
if (!pReader->streamStateDone) {
if (pReader->pStreamStateReader == NULL) {
code =
streamStateSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamStateReader);
if (code) {
pReader->streamStateDone = 1;
pReader->pStreamStateReader = NULL;
goto _err;
}
}
code = streamStateSnapRead(pReader->pStreamStateReader, ppData);
if (code) {
goto _err;
} else {
if (*ppData) {
goto _exit;
} else {
pReader->streamStateDone = 1;
code = streamStateSnapReaderClose(pReader->pStreamStateReader);
if (code) goto _err;
pReader->pStreamStateReader = NULL;
}
}
}
// RSMA ==============
@ -362,6 +410,20 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot *
if (code) goto _exit;
}
if (pWriter->pStreamTaskWriter) {
code = streamTaskSnapWriterClose(pWriter->pStreamTaskWriter, rollback);
if (code) goto _exit;
}
if (pWriter->pStreamStateWriter) {
code = streamStateSnapWriterClose(pWriter->pStreamStateWriter, rollback);
if (code) goto _exit;
code = streamStateRebuildFromSnap(pWriter->pStreamStateWriter, 0);
pWriter->pStreamStateWriter = NULL;
if (code) goto _exit;
}
if (pWriter->pRsmaSnapWriter) {
code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback);
if (code) goto _exit;
@ -459,9 +521,23 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
} break;
case SNAP_DATA_TQ_OFFSET: {
} break;
case SNAP_DATA_STREAM_TASK: {
case SNAP_DATA_STREAM_TASK:
case SNAP_DATA_STREAM_TASK_CHECKPOINT: {
if (pWriter->pStreamTaskWriter == NULL) {
code = streamTaskSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamTaskWriter);
if (code) goto _err;
}
code = streamTaskSnapWrite(pWriter->pStreamTaskWriter, pData, nData);
if (code) goto _err;
} break;
case SNAP_DATA_STREAM_STATE: {
case SNAP_DATA_STREAM_STATE_BACKEND: {
if (pWriter->pStreamStateWriter == NULL) {
code = streamStateSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamStateWriter);
if (code) goto _err;
}
code = streamStateSnapWrite(pWriter->pStreamStateWriter, pData, nData);
if (code) goto _err;
} break;
case SNAP_DATA_RSMA1:
case SNAP_DATA_RSMA2:

View File

@ -377,7 +377,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) {
SEncoder *pCoder = &(SEncoder){0};
SDeleteRes res = {0};
SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb};
SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb};
initStorageAPI(&handle.api);
code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res);
@ -561,7 +561,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
}
break;
case TDMT_STREAM_TASK_DEPLOY: {
if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) {
if (tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) {
goto _err;
}
} break;
@ -571,12 +571,14 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
}
} break;
case TDMT_STREAM_TASK_PAUSE: {
if (pVnode->restored && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
if (pVnode->restored && vnodeIsLeader(pVnode) &&
tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
case TDMT_STREAM_TASK_RESUME: {
if (pVnode->restored && tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
if (pVnode->restored && vnodeIsLeader(pVnode) &&
tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
@ -586,6 +588,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
goto _err;
}
break;
case TDMT_VND_ALTER_CONFIG:
vnodeProcessAlterConfigReq(pVnode, ver, pReq, len, pRsp);
break;
@ -598,6 +601,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
case TDMT_VND_DROP_INDEX:
vnodeProcessDropIndexReq(pVnode, ver, pReq, len, pRsp);
break;
case TDMT_VND_STREAM_CHECK_POINT_SOURCE:
tqProcessStreamCheckPointSourceReq(pVnode->pTq, pMsg);
break;
case TDMT_VND_STREAM_TASK_UPDATE:
tqProcessTaskUpdateReq(pVnode->pTq, pMsg);
break;
case TDMT_VND_COMPACT:
vnodeProcessCompactVnodeReq(pVnode, ver, pReq, len, pRsp);
goto _exit;
@ -614,7 +623,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
walApplyVer(pVnode->pWal, ver);
if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, ver) < 0) {
if (tqPushMsg(pVnode->pTq, pMsg->msgType) < 0) {
vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno));
return -1;
}
@ -665,7 +674,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) {
return 0;
}
SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb};
SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb};
initStorageAPI(&handle.api);
switch (pMsg->msgType) {
@ -744,9 +753,9 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo)
return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true);
case TDMT_STREAM_TASK_DISPATCH_RSP:
return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK:
case TDMT_VND_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
case TDMT_VND_STREAM_TASK_CHECK_RSP:
return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg);
case TDMT_STREAM_RETRIEVE:
return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg);
@ -754,10 +763,12 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo)
return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg);
case TDMT_VND_STREAM_SCAN_HISTORY:
return tqProcessTaskScanHistory(pVnode->pTq, pMsg);
case TDMT_STREAM_SCAN_HISTORY_FINISH:
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH:
return tqProcessTaskScanHistoryFinishReq(pVnode->pTq, pMsg);
case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP:
return tqProcessTaskScanHistoryFinishRsp(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECKPOINT_READY:
return tqProcessStreamTaskCheckpointReadyMsg(pVnode->pTq, pMsg);
default:
vError("unknown msg type:%d in stream queue", pMsg->msgType);
return TSDB_CODE_APP_ERROR;
@ -765,7 +776,6 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo)
}
void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) {
// blockDebugShowDataBlocks(data, __func__);
tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data);
}
@ -1668,7 +1678,7 @@ static int32_t vnodeConsolidateAlterHashRange(SVnode *pVnode, int64_t ver) {
}
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) {
vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confim msg is processed", TD_VID(pVnode));
vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confirm msg is processed", TD_VID(pVnode));
int32_t code = TSDB_CODE_SUCCESS;
if (!pVnode->config.hashChange) {
goto _exit;

View File

@ -14,6 +14,7 @@
*/
#define _DEFAULT_SOURCE
#include "tq.h"
#include "vnd.h"
#define BATCH_ENABLE 0
@ -216,7 +217,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs)
isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle);
if (!pVnode->restored) {
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType));
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
TMSG_INFO(pMsg->msgType));
terrno = TSDB_CODE_SYN_RESTORING;
vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
rpcFreeCont(pMsg->pCont);
@ -279,7 +281,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs)
vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle);
if (!pVnode->restored) {
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType));
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
TMSG_INFO(pMsg->msgType));
vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
@ -527,6 +530,7 @@ static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *p
static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) {
SVnode *pVnode = pFsm->data;
int32_t vgId = pVnode->config.vgId;
SyncIndex appliedIdx = -1;
do {
@ -538,7 +542,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
} else {
vInfo("vgId:%d, restore not finish since %" PRId64 " items to be applied. commit-index:%" PRId64
", applied-index:%" PRId64,
pVnode->config.vgId, commitIdx - appliedIdx, commitIdx, appliedIdx);
vgId, commitIdx - appliedIdx, commitIdx, appliedIdx);
taosMsleep(10);
}
} while (true);
@ -547,15 +551,20 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
walApplyVer(pVnode->pWal, commitIdx);
pVnode->restored = true;
vInfo("vgId:%d, sync restore finished, start to restore stream tasks by replay wal", pVnode->config.vgId);
if (vnodeIsRoleLeader(pVnode)) {
vInfo("vgId:%d, sync restore finished, start to launch stream tasks", vgId);
// start to restore all stream tasks
if (tsDisableStream) {
vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId);
vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", vgId);
} else {
vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId);
tqCheckStreamStatus(pVnode->pTq);
}
} else {
vInfo("vgId:%d, sync restore finished, no launch stream tasks since not leader", vgId);
}
}
static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
@ -586,6 +595,9 @@ static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data;
if (pVnode->pTq) {
tqUpdateNodeStage(pVnode->pTq);
}
vDebug("vgId:%d, become leader", pVnode->config.vgId);
}
@ -660,8 +672,8 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion) {
vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId, pNode->clusterId);
vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn,
pNode->nodePort, pNode->nodeId, pNode->clusterId);
}
pVnode->sync = syncOpen(&syncInfo, vnodeVersion);

View File

@ -21,11 +21,11 @@ extern "C" {
#include "os.h"
#include "tcommon.h"
#include "theap.h"
#include "tlosertree.h"
#include "tsort.h"
#include "ttszip.h"
#include "tvariant.h"
#include "theap.h"
#include "dataSinkMgt.h"
#include "executil.h"
@ -39,8 +39,8 @@ extern "C" {
#include "tlockfree.h"
#include "tmsg.h"
#include "tpagedbuf.h"
//#include "tstream.h"
//#include "tstreamUpdate.h"
// #include "tstream.h"
// #include "tstreamUpdate.h"
#include "tlrucache.h"
typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order);
@ -48,13 +48,7 @@ typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int
typedef struct STsdbReader STsdbReader;
typedef struct STqReader STqReader;
typedef enum SOperatorParamType{
OP_GET_PARAM = 1,
OP_NOTIFY_PARAM
} SOperatorParamType;
typedef enum SOperatorParamType { OP_GET_PARAM = 1, OP_NOTIFY_PARAM } SOperatorParamType;
#define IS_VALID_SESSION_WIN(winInfo) ((winInfo).sessionWin.win.skey > 0)
#define SET_SESSION_WIN_INVALID(winInfo) ((winInfo).sessionWin.win.skey = INT64_MIN)
@ -275,7 +269,7 @@ typedef struct STableScanInfo {
int8_t assignBlockUid;
bool hasGroupByTag;
bool countOnly;
// TsdReader readerAPI;
// TsdReader readerAPI;
} STableScanInfo;
typedef struct STableMergeScanInfo {
@ -383,8 +377,6 @@ typedef struct STimeWindowAggSupp {
int64_t waterMark;
TSKEY maxTs;
TSKEY minTs;
TSKEY checkPointTs;
TSKEY checkPointInterval;
SColumnInfoData timeWindowData; // query time window info for scalar function execution.
} STimeWindowAggSupp;
@ -419,8 +411,6 @@ typedef struct SStreamScanInfo {
SExprSupp* pPartScalarSup;
bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA.
int32_t scanWinIndex; // for state operator
int32_t pullDataResIndex;
SSDataBlock* pPullDataRes; // pull data SSDataBlock
SSDataBlock* pDeleteDataRes; // delete data SSDataBlock
int32_t deleteDataIndex;
STimeWindow updateWin;
@ -438,9 +428,10 @@ typedef struct SStreamScanInfo {
SSDataBlock* pCreateTbRes;
int8_t igCheckUpdate;
int8_t igExpired;
void* pState; //void
void* pState; // void
SStoreTqReader readerFn;
SStateStore stateStore;
SSDataBlock* pCheckpointRes;
} SStreamScanInfo;
typedef struct {
@ -502,6 +493,11 @@ typedef struct SMergeAlignedIntervalAggOperatorInfo {
SResultRow* pResultRow;
} SMergeAlignedIntervalAggOperatorInfo;
typedef struct SOpCheckPointInfo {
uint16_t checkPointId;
SHashObj* children; // key:child id
} SOpCheckPointInfo;
typedef struct SStreamIntervalOperatorInfo {
SOptrBasicInfo binfo; // basic info
SAggSupporter aggSup; // aggregate supporter
@ -529,9 +525,12 @@ typedef struct SStreamIntervalOperatorInfo {
SArray* pUpdated;
SSHashObj* pUpdatedMap;
int64_t dataVersion;
SStateStore statestore;
SStateStore stateStore;
bool recvGetAll;
SHashObj* pFinalPullDataMap;
SOpCheckPointInfo checkPointInfo;
bool reCkBlock;
SSDataBlock* pCheckpointRes;
} SStreamIntervalOperatorInfo;
typedef struct SDataGroupInfo {
@ -578,6 +577,8 @@ typedef struct SStreamSessionAggOperatorInfo {
int64_t dataVersion;
SArray* historyWins;
bool isHistoryOp;
bool reCkBlock;
SSDataBlock* pCheckpointRes;
} SStreamSessionAggOperatorInfo;
typedef struct SStreamStateAggOperatorInfo {
@ -599,6 +600,8 @@ typedef struct SStreamStateAggOperatorInfo {
int64_t dataVersion;
bool isHistoryOp;
SArray* historyWins;
bool reCkBlock;
SSDataBlock* pCheckpointRes;
} SStreamStateAggOperatorInfo;
typedef struct SStreamPartitionOperatorInfo {
@ -652,7 +655,9 @@ typedef struct SStreamFillOperatorInfo {
#define OPTR_SET_OPENED(_optr) ((_optr)->status |= OP_OPENED)
SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode);
int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo);
int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName,
SExecTaskInfo* pTaskInfo);
void cleanupQueriedTableScanInfo(void* p);
void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock);
@ -724,7 +729,8 @@ bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap);
bool functionNeedToExecute(SqlFunctionCtx* pCtx);
bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup);
bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup);
bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, SStateStore* pStore);
bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup,
SStateStore* pStore);
void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid,
uint64_t* pGp, void* pTbName);
uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId);
@ -736,8 +742,8 @@ bool groupbyTbname(SNodeList* pGroupList);
int32_t buildDataBlockFromGroupRes(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup,
SGroupResInfo* pGroupResInfo);
int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t size, SStateStore* pAPI);
int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock,
SExprSupp* pSup, SGroupResInfo* pGroupResInfo);
int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup,
SGroupResInfo* pGroupResInfo);
int32_t releaseOutputBuf(void* pState, SWinKey* pKey, SResultRow* pResult, SStateStore* pAPI);
void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order);
int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order,
@ -758,6 +764,8 @@ uint64_t calcGroupId(char* pData, int32_t len);
void streamOpReleaseState(struct SOperatorInfo* pOperator);
void streamOpReloadState(struct SOperatorInfo* pOperator);
int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup);
void* decodeSTimeWindowAggSupp(void* buf, STimeWindowAggSupp* pTwAggSup);
void destroyOperatorParamValue(void* pValues);
int32_t mergeOperatorParams(SOperatorParam* pDst, SOperatorParam* pSrc);
int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, int32_t srcOpType, bool tableSeq);

View File

@ -116,7 +116,7 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMerge
SOperatorInfo* createMergeAlignedIntervalOperatorInfo(SOperatorInfo* downstream, SMergeAlignedIntervalPhysiNode* pNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild);
SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle);
SOperatorInfo* createSessionAggOperatorInfo(SOperatorInfo* downstream, SSessionWinodwPhysiNode* pSessionNode, SExecTaskInfo* pTaskInfo);
@ -146,7 +146,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle);
SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);

View File

@ -70,8 +70,6 @@ typedef struct {
SVersionRange fillHistoryVer;
STimeWindow fillHistoryWindow;
SStreamState* pState;
int64_t dataVersion;
int64_t checkPointId;
} SStreamTaskInfo;
struct SExecTaskInfo {

View File

@ -54,8 +54,8 @@ typedef struct SDataDispatchHandle {
// clang-format off
// data format:
// +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+
// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data | .... | | (4 bytes) |(8 bytes)
// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | |
// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data |
// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | | |
// +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+
// The length of bitmap is decided by number of rows of this data block, and the length of each column data is
// recorded in the first segment, next to the struct header

View File

@ -14,9 +14,9 @@
*/
#include "executor.h"
#include <libs/transport/trpc.h>
#include <libs/wal/wal.h>
#include "executorInt.h"
#include "trpc.h"
#include "wal.h"
#include "operator.h"
#include "planner.h"
#include "querytask.h"
@ -149,11 +149,15 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu
} else if (type == STREAM_INPUT__DATA_BLOCK) {
for (int32_t i = 0; i < numOfBlocks; ++i) {
SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i];
SPackedData tmp = { .pDataBlock = pDataBlock };
SPackedData tmp = {.pDataBlock = pDataBlock};
taosArrayPush(pInfo->pBlockLists, &tmp);
}
pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
} else if (type == STREAM_INPUT__CHECKPOINT) {
SPackedData tmp = {.pDataBlock = input};
taosArrayPush(pInfo->pBlockLists, &tmp);
pInfo->blockType = STREAM_INPUT__CHECKPOINT;
} else {
ASSERT(0);
}
@ -162,7 +166,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu
}
}
void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI *pAPI) {
void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI* pAPI) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
SStreamScanInfo* pStreamScanInfo = pOperator->info;
@ -203,13 +207,6 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) {
return code;
}
void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) {
SExecTaskInfo* pTaskInfo = tinfo;
*dataVer = pTaskInfo->streamInfo.dataVersion;
*ckId = pTaskInfo->streamInfo.checkPointId;
}
int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) {
if (tinfo == NULL) {
return TSDB_CODE_APP_ERROR;
@ -1251,8 +1248,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT
pScanInfo->scanTimes = 0;
if (pScanBaseInfo->dataReader == NULL) {
int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1,
pScanInfo->pResBlock, (void**) &pScanBaseInfo->dataReader, id, false, NULL);
int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(
pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, pScanInfo->pResBlock,
(void**)&pScanBaseInfo->dataReader, id, false, NULL);
if (code != TSDB_CODE_SUCCESS) {
qError("prepare read tsdb snapshot failed, uid:%" PRId64 ", code:%s %s", pOffset->uid, tstrerror(code), id);
terrno = code;
@ -1310,8 +1308,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT
STableKeyInfo* pList = tableListGetInfo(pTableListInfo, 0);
int32_t size = tableListGetSize(pTableListInfo);
pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, (void**) &pInfo->dataReader, NULL,
false, NULL);
pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL,
(void**)&pInfo->dataReader, NULL, false, NULL);
cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond);
strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName);
@ -1377,7 +1375,7 @@ SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) {
SArray* pUidList = taosArrayInit(10, sizeof(uint64_t));
int32_t numOfTables = tableListGetSize(pTableListInfo);
for(int32_t i = 0; i < numOfTables; ++i) {
for (int32_t i = 0; i < numOfTables; ++i) {
STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i);
taosArrayPush(pUidList, &pKeyInfo->uid);
}

View File

@ -30,10 +30,10 @@
#include "operator.h"
#include "query.h"
#include "querytask.h"
#include "storageapi.h"
#include "tcompare.h"
#include "thash.h"
#include "ttypes.h"
#include "storageapi.h"
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP)
@ -697,8 +697,8 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
uint32_t newSize = pBlock->info.rows + pRow->numOfRows + ((numOfRows - i) > 1 ? 1 : 0);
blockDataEnsureCapacity(pBlock, newSize);
qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s",
newSize, pBlock->info.capacity, GET_TASKID(pTaskInfo));
qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", newSize,
pBlock->info.capacity, GET_TASKID(pTaskInfo));
// todo set the pOperator->resultInfo size
}
@ -737,10 +737,12 @@ void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGr
// clear the existed group id
pBlock->info.id.groupId = 0;
ASSERT(!pbInfo->mergeResultBlock);
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false);
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold,
false);
void* tbname = NULL;
if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) {
if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) <
0) {
pBlock->info.parTbName[0] = 0;
} else {
memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
@ -765,10 +767,12 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG
// clear the existed group id
pBlock->info.id.groupId = 0;
if (!pbInfo->mergeResultBlock) {
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false);
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold,
false);
} else {
while (hasRemainResults(pGroupResInfo)) {
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, true);
doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold,
true);
if (pBlock->info.rows >= pOperator->resultInfo.threshold) {
break;
}
@ -966,8 +970,8 @@ int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t si
return TSDB_CODE_SUCCESS;
}
int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock,
SExprSupp* pSup, SGroupResInfo* pGroupResInfo) {
int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup,
SGroupResInfo* pGroupResInfo) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStorageAPI* pAPI = &pTaskInfo->storageAPI;
@ -986,8 +990,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
// ASSERT(code == 0);
if (code == -1) {
// for history
qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey,
pKey->win.ekey, pKey->groupId);
qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 "",
pKey->win.skey, pKey->win.ekey, pKey->groupId);
pGroupResInfo->index += 1;
continue;
}
@ -1004,7 +1008,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
pBlock->info.id.groupId = pKey->groupId;
void* tbname = NULL;
if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) {
if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId,
&tbname) < 0) {
pBlock->info.parTbName[0] = 0;
} else {
memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);

View File

@ -1367,6 +1367,7 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) {
memcpy(pInfo->pSrcBlock->info.parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
pInfo->srcRowIndex = -1;
} break;
case STREAM_CHECKPOINT:
case STREAM_CREATE_CHILD_TABLE: {
return pBlock;
} break;

View File

@ -1130,10 +1130,14 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) {
printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo));
return pInfo->pDelRes;
} break;
default:
ASSERTS(pBlock->info.type == STREAM_CREATE_CHILD_TABLE || pBlock->info.type == STREAM_RETRIEVE, "invalid SSDataBlock type");
case STREAM_CREATE_CHILD_TABLE:
case STREAM_RETRIEVE:
case STREAM_CHECKPOINT: {
return pBlock;
}
default:
ASSERTS(0, "invalid SSDataBlock type");
}
// there is an scalar expression that needs to be calculated right before apply the group aggregation.
if (pInfo->scalarSup.pExprInfo != NULL) {
@ -1185,8 +1189,8 @@ void initParDownStream(SOperatorInfo* downstream, SPartitionBySupporter* pParSup
SStreamScanInfo* pScanInfo = downstream->info;
pScanInfo->partitionSup = *pParSup;
pScanInfo->pPartScalarSup = pExpr;
if (!pScanInfo->igCheckUpdate && !pScanInfo->pUpdateInfo) {
pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0);
if (!pScanInfo->pUpdateInfo) {
pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0, pScanInfo->igCheckUpdate);
}
}

View File

@ -479,7 +479,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode;
pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) {
pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo);
pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) {
SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode;
pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo);
@ -488,10 +488,10 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) {
int32_t children = 0;
pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) {
int32_t children = pHandle->numOfVgroups;
pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) {
pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) {

View File

@ -43,6 +43,7 @@ int32_t scanDebug = 0;
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
#define STREAM_SCAN_OP_NAME "StreamScanOperator"
#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState"
#define STREAM_SCAN_OP_CHECKPOINT_NAME "StreamScanOperator_Checkpoint"
typedef struct STableMergeScanExecInfo {
SFileBlockLoadRecorder blockRecorder;
@ -1958,23 +1959,46 @@ static void doCheckUpdate(SStreamScanInfo* pInfo, TSKEY endKey, SSDataBlock* pBl
}
}
//int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) {
// int32_t len = updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo);
// *pBuff = taosMemoryCalloc(1, len);
// updateInfoSerialize(*pBuff, len, pInfo->pUpdateInfo);
// return len;
//}
int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) {
int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo);
len += encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup);
*pBuff = taosMemoryCalloc(1, len);
void* buf = *pBuff;
encodeSTimeWindowAggSupp(&buf, &pInfo->twAggSup);
pInfo->stateStore.updateInfoSerialize(buf, len, pInfo->pUpdateInfo);
return len;
}
void streamScanOperatorSaveCheckpoint(SStreamScanInfo* pInfo) {
if (!pInfo->pState) {
return;
}
void* pBuf = NULL;
int32_t len = streamScanOperatorEncode(pInfo, &pBuf);
pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), pBuf, len);
taosMemoryFree(pBuf);
pInfo->stateStore.streamStateCommit(pInfo->pState);
}
// other properties are recovered from the execution plan
void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo) {
if (!pBuff || len == 0) {
return;
}
void* buf = pBuff;
buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup);
int32_t tlen = len - encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup);
if (tlen == 0) {
return;
}
void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo);
int32_t code = pInfo->stateStore.updateInfoDeserialize(buf, tlen, pUpInfo);
if (code == TSDB_CODE_SUCCESS) {
pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo);
pInfo->pUpdateInfo = pUpInfo;
} else {
taosMemoryFree(pUpInfo);
}
}
@ -2155,6 +2179,9 @@ FETCH_NEXT_BLOCK:
}
}
} break;
case STREAM_CHECKPOINT: {
qError("stream check point error. msg type: STREAM_INPUT__DATA_BLOCK");
} break;
default:
break;
}
@ -2295,6 +2322,23 @@ FETCH_NEXT_BLOCK:
}
goto NEXT_SUBMIT_BLK;
} else if (pInfo->blockType == STREAM_INPUT__CHECKPOINT) {
if (pInfo->validBlockIndex >= total) {
doClearBufferedBlocks(pInfo);
return NULL;
}
int32_t current = pInfo->validBlockIndex++;
qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id);
SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current);
SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0);
if (pBlock->info.type == STREAM_CHECKPOINT) {
streamScanOperatorSaveCheckpoint(pInfo);
}
// printDataBlock(pBlock, "stream scan ck");
return pInfo->pCheckpointRes;
}
return NULL;
@ -2458,11 +2502,12 @@ static void destroyStreamScanOperatorInfo(void* param) {
pStreamScan->stateStore.updateInfoDestroy(pStreamScan->pUpdateInfo);
blockDataDestroy(pStreamScan->pRes);
blockDataDestroy(pStreamScan->pUpdateRes);
blockDataDestroy(pStreamScan->pPullDataRes);
blockDataDestroy(pStreamScan->pDeleteDataRes);
blockDataDestroy(pStreamScan->pUpdateDataRes);
blockDataDestroy(pStreamScan->pCreateTbRes);
taosArrayDestroy(pStreamScan->pBlockLists);
blockDataDestroy(pStreamScan->pCheckpointRes);
taosMemoryFree(pStreamScan);
}
@ -2669,7 +2714,6 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys
pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE;
pInfo->windowSup = (SWindowSupporter){.pStreamAggSup = NULL, .gap = -1, .parentType = QUERY_NODE_PHYSICAL_PLAN};
pInfo->groupId = 0;
pInfo->pPullDataRes = createSpecialDataBlock(STREAM_RETRIEVE);
pInfo->pStreamScanOp = pOperator;
pInfo->deleteDataIndex = 0;
pInfo->pDeleteDataRes = createSpecialDataBlock(STREAM_DELETE_DATA);
@ -2683,15 +2727,18 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys
pInfo->pState = pTaskInfo->streamInfo.pState;
pInfo->stateStore = pTaskInfo->storageAPI.stateStore;
pInfo->readerFn = pTaskInfo->storageAPI.tqReaderFn;
pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT);
// for stream
if (pTaskInfo->streamInfo.pState) {
void* buff = NULL;
int32_t len = 0;
pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_NAME, strlen(STREAM_SCAN_OP_NAME), &buff, &len);
int32_t res = pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), &buff, &len);
if (res == TSDB_CODE_SUCCESS) {
streamScanOperatorDecode(buff, len, pInfo);
taosMemoryFree(buff);
}
}
setOperatorInfo(pOperator, STREAM_SCAN_OP_NAME, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, false, OP_NOT_OPENED, pInfo,
pTaskInfo);

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
#include "functionMgt.h"
#include "operator.h"
#include "querytask.h"
#include "tchecksum.h"
#include "tcommon.h"
#include "tcompare.h"
#include "tdatablock.h"
@ -55,7 +56,6 @@ typedef enum SResultTsInterpType {
RESULT_ROW_END_INTERP = 2,
} SResultTsInterpType;
typedef struct SOpenWindowInfo {
SResultRowPosition pos;
uint64_t groupId;
@ -388,7 +388,7 @@ static bool setTimeWindowInterpolationEndTs(SIntervalAggOperatorInfo* pInfo, SEx
bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, TSKEY calEnd, EStreamType blockType) {
if (pInterval->interval != pInterval->sliding &&
((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart) )) {
((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart))) {
return false;
}
@ -728,8 +728,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul
}
TSKEY ekey = ascScan ? win.ekey : win.skey;
int32_t forwardRows =
getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder);
int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL,
pInfo->binfo.inputTsOrder);
// prev time window not interpolation yet.
if (pInfo->timeWindowInterpo) {
@ -756,7 +756,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul
STimeWindow nextWin = win;
while (1) {
int32_t prevEndPos = forwardRows - 1 + startPos;
startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, pInfo->binfo.inputTsOrder);
startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos,
pInfo->binfo.inputTsOrder);
if (startPos < 0 || filterWindowWithLimit(pInfo, &nextWin, tableGroupId)) {
break;
}
@ -768,8 +769,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul
}
ekey = ascScan ? nextWin.ekey : nextWin.skey;
forwardRows =
getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder);
forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL,
pInfo->binfo.inputTsOrder);
// window start(end) key interpolation
doWindowBorderInterpolation(pInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pSup);
// TODO: add to open window? how to close the open windows after input blocks exhausted?
@ -1116,7 +1117,6 @@ static void doClearWindowImpl(SResultRowPosition* p1, SDiskbasedBuf* pResultBuf,
releaseBufPage(pResultBuf, bufPage);
}
static void destroyStateWindowOperatorInfo(void* param) {
SStateWindowOperatorInfo* pInfo = (SStateWindowOperatorInfo*)param;
cleanupBasicInfo(&pInfo->binfo);
@ -1153,7 +1153,6 @@ void destroyIntervalOperatorInfo(void* param) {
taosMemoryFreeClear(param);
}
static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SIntervalAggOperatorInfo* pInfo) {
// the primary timestamp column
bool needed = false;
@ -1208,13 +1207,6 @@ static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SInt
return needed;
}
void initStreamFunciton(SqlFunctionCtx* pCtx, int32_t numOfExpr) {
for (int32_t i = 0; i < numOfExpr; i++) {
// pCtx[i].isStream = true;
}
}
SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo) {
SIntervalAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIntervalAggOperatorInfo));
@ -1235,8 +1227,8 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPh
int32_t num = 0;
SExprInfo* pExprInfo = createExprInfo(pPhyNode->window.pFuncs, NULL, &num);
int32_t code =
initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str, pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore);
int32_t code = initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str,
pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -1476,7 +1468,8 @@ SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SStateWi
if (pStateNode->window.pExprs != NULL) {
int32_t numOfScalarExpr = 0;
SExprInfo* pScalarExprInfo = createExprInfo(pStateNode->window.pExprs, NULL, &numOfScalarExpr);
int32_t code = initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore);
int32_t code =
initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -1615,7 +1608,6 @@ _error:
return NULL;
}
void destroyMAIOperatorInfo(void* param) {
SMergeAlignedIntervalAggOperatorInfo* miaInfo = (SMergeAlignedIntervalAggOperatorInfo*)param;
destroyIntervalOperatorInfo(miaInfo->intervalAggOperatorInfo);
@ -1979,8 +1971,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo*
}
TSKEY ekey = ascScan ? win.ekey : win.skey;
int32_t forwardRows =
getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder);
int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL,
iaInfo->binfo.inputTsOrder);
ASSERT(forwardRows > 0);
// prev time window not interpolation yet.
@ -2010,8 +2002,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo*
STimeWindow nextWin = win;
while (1) {
int32_t prevEndPos = forwardRows - 1 + startPos;
startPos =
getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, iaInfo->binfo.inputTsOrder);
startPos = getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos,
iaInfo->binfo.inputTsOrder);
if (startPos < 0) {
break;
}
@ -2025,8 +2017,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo*
}
ekey = ascScan ? nextWin.ekey : nextWin.skey;
forwardRows =
getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder);
forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL,
iaInfo->binfo.inputTsOrder);
// window start(end) key interpolation
doWindowBorderInterpolation(iaInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pExprSup);

View File

@ -44,9 +44,11 @@ typedef struct {
int64_t defaultCfInit;
} SBackendWrapper;
void* streamBackendInit(const char* path);
void* streamBackendInit(const char* path, int64_t chkpId);
void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg);
int32_t streamBackendLoadCheckpointInfo(void* pMeta);
int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId);
SListNode* streamBackendAddCompare(void* backend, void* arg);
void streamBackendDelCompare(void* backend, void* arg);
@ -135,5 +137,10 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb
void* val, int32_t vlen, int64_t ttl, void* tmpBuf);
int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch);
int32_t streamBackendTriggerChkp(void* pMeta, char* dst);
int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId);
int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId);
// int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result);
#endif

View File

@ -19,7 +19,7 @@
#include "executor.h"
#include "query.h"
#include "tstream.h"
#include "streamBackendRocksdb.h"
#include "trpc.h"
#ifdef __cplusplus
@ -41,11 +41,15 @@ typedef struct {
} SStreamContinueExecInfo;
extern SStreamGlobalEnv streamEnv;
extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
const char* streamGetBlockTypeStr(int32_t type);
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration);
int32_t streamDispatchStreamBlock(SStreamTask* pTask);
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg);
int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock);
SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg);
SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize,
SArray* pRes);
void destroyStreamDataBlock(SStreamDataBlock* pBlock);
@ -55,13 +59,19 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet);
int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId);
int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask);
int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask);
int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask);
int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks);
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem);
int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen);
int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq);
int32_t streamNotifyUpstreamContinue(SStreamTask* pTask);
int32_t streamTaskFillHistoryFinished(SStreamTask* pTask);

View File

@ -18,7 +18,6 @@
#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480
#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30)
#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F)
SStreamGlobalEnv streamEnv;
@ -30,7 +29,7 @@ int32_t streamInit() {
}
if (old == 0) {
streamEnv.timer = taosTmrInit(10000, 100, 10000, "STREAM");
streamEnv.timer = taosTmrInit(1000, 100, 10000, "STREAM");
if (streamEnv.timer == NULL) {
atomic_store_8(&streamEnv.inited, 0);
return -1;
@ -67,7 +66,6 @@ static void streamSchedByTimer(void* param, void* tmrId) {
qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->triggerParam);
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
streamMetaReleaseTask(NULL, pTask);
qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr);
return;
}
@ -104,7 +102,7 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) {
int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1);
ASSERT(ref == 2 && pTask->schedTimer == NULL);
qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam);
qDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->triggerParam);
pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer);
pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE;
@ -141,14 +139,55 @@ int32_t streamSchedExec(SStreamTask* pTask) {
return 0;
}
static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) {
*pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
if (*pBuf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId);
SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead));
pDispatchRsp->inputStatus = status;
pDispatchRsp->streamId = htobe64(pReq->streamId);
pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId);
pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId);
pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId);
pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId);
return TSDB_CODE_SUCCESS;
}
static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) {
int8_t status = 0;
SStreamDataBlock* pBlock = createStreamBlockFromDispatchMsg(pReq, pReq->type, pReq->srcVgId);
if (pBlock == NULL) {
streamTaskInputFail(pTask);
status = TASK_INPUT_STATUS__FAILED;
qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId,
pTask->id.idStr);
} else {
if (pBlock->type == STREAM_INPUT__TRANS_STATE) {
pTask->status.appendTranstateBlock = true;
}
int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock);
// input queue is full, upstream is blocked now
status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED;
}
return status;
}
int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) {
SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
int8_t status = TASK_INPUT_STATUS__NORMAL;
// enqueue
if (pData != NULL) {
qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId,
pReq->srcTaskId, pReq->srcNodeId, pReq->reqId);
qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr,
pTask->info.selfChildId, pReq->srcTaskId, pReq->srcNodeId, pReq->reqId);
pData->type = STREAM_INPUT__DATA_RETRIEVE;
pData->srcVgId = 0;
@ -181,7 +220,7 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock
int32_t code = 0;
int32_t type = pTask->outputInfo.type;
if (type == TASK_OUTPUT__TABLE) {
pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks);
pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks);
destroyStreamDataBlock(pBlock);
} else if (type == TASK_OUTPUT__SMA) {
pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks);
@ -200,70 +239,69 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock
return 0;
}
// static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) {
// int8_t status = 0;
//
// SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId);
// if (pBlock == NULL) {
// streamTaskInputFail(pTask);
// status = TASK_INPUT_STATUS__FAILED;
// qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId,
// pTask->id.idStr);
// } else {
// if (pBlock->type == STREAM_INPUT__TRANS_STATE) {
// pTask->status.appendTranstateBlock = true;
// }
//
// int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock);
// // input queue is full, upstream is blocked now
// status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED;
// }
//
// return status;
// }
static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) {
int8_t status = 0;
SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId);
if (pBlock == NULL) {
streamTaskInputFail(pTask);
status = TASK_INPUT_STATUS__FAILED;
qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId,
pTask->id.idStr);
} else {
if (pBlock->type == STREAM_INPUT__TRANS_STATE) {
pTask->status.appendTranstateBlock = true;
}
int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock);
// input queue is full, upstream is blocked now
status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED;
}
return status;
}
static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) {
*pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
if (*pBuf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId);
SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead));
pDispatchRsp->inputStatus = status;
pDispatchRsp->streamId = htobe64(pReq->streamId);
pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId);
pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId);
pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId);
pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId);
return TSDB_CODE_SUCCESS;
}
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) {
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId);
if (pInfo != NULL) {
pInfo->dataAllowed = false;
}
}
// static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void**
// pBuf) {
// *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
// if (*pBuf == NULL) {
// return TSDB_CODE_OUT_OF_MEMORY;
// }
//
// ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId);
// SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead));
//
// pDispatchRsp->inputStatus = status;
// pDispatchRsp->streamId = htobe64(pReq->streamId);
// pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId);
// pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId);
// pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId);
// pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId);
//
// return TSDB_CODE_SUCCESS;
// }
int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) {
qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr,
pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen);
int32_t status = 0;
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId);
ASSERT(pInfo != NULL);
if (!pInfo->dataAllowed) {
qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, pReq->upstreamTaskId);
// upstream task has restarted/leader-follower switch/transferred to other dnodes
if (pReq->stage > pInfo->stage) {
qError("s-task:%s upstream task:0x%x (vgId:%d) has restart/leader-switch/vnode-transfer, prev stage:%" PRId64
", current:%" PRId64 " dispatch msg rejected",
pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pInfo->stage, pReq->stage);
status = TASK_INPUT_STATUS__BLOCKED;
} else {
// Current task has received the checkpoint req from the upstream task, from which the message should all be blocked
if (!pInfo->dataAllowed) {
qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr,
pReq->upstreamTaskId);
status = TASK_INPUT_STATUS__BLOCKED;
} else {
// Current task has received the checkpoint req from the upstream task, from which the message should all be
// blocked
if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId);
qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId);
@ -271,6 +309,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
status = streamTaskAppendInputBlocks(pTask, pReq);
}
}
{
// do send response with the input status
@ -290,30 +329,10 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
return 0;
}
//int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) {
// qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr,
// pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen);
//
// // todo add the input queue buffer limitation
// streamTaskEnqueueBlocks(pTask, pReq, pRsp);
// tDeleteStreamDispatchReq(pReq);
//
// if (exec) {
// if (streamTryExec(pTask) < 0) {
// return -1;
// }
// } else {
// streamSchedExec(pTask);
// }
//
// return 0;
//}
int32_t streamProcessRunReq(SStreamTask* pTask) {
if (streamTryExec(pTask) < 0) {
return -1;
}
return 0;
}
@ -338,9 +357,10 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
if (type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* px = (SStreamDataSubmit*)pItem;
if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) {
qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size);
qError(
"s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push "
"data",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size);
streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
return -1;
@ -361,23 +381,24 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
msgLen, ver, total, size + SIZE_IN_MB(msgLen));
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE ||
type == STREAM_INPUT__REF_DATA_BLOCK) {
if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */(tInputQueueIsFull(pTask))) {
if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */ (tInputQueueIsFull(pTask))) {
qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size);
destroyStreamDataBlock((SStreamDataBlock*) pItem);
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size);
destroyStreamDataBlock((SStreamDataBlock*)pItem);
return -1;
}
qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size);
int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem);
if (code != TSDB_CODE_SUCCESS) {
destroyStreamDataBlock((SStreamDataBlock*) pItem);
destroyStreamDataBlock((SStreamDataBlock*)pItem);
return code;
}
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__TRANS_STATE) {
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER ||
type == STREAM_INPUT__TRANS_STATE) {
taosWriteQitem(pTask->inputQueue->queue, pItem);
qDebug("s-task:%s checkpoint/trans-state blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size);
qDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr,
pTask->info.taskLevel, streamGetBlockTypeStr(type), total, size);
} else if (type == STREAM_INPUT__GET_RES) {
// use the default memory limit, refactor later.
taosWriteQitem(pTask->inputQueue->queue, pItem);
@ -388,7 +409,7 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) {
atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE);
qDebug("s-task:%s new data arrived, active the trigger, trigerStatus:%d", pTask->id.idStr, pTask->triggerStatus);
qDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->triggerStatus);
}
return 0;
@ -416,26 +437,34 @@ void* streamQueueNextItem(SStreamQueue* pQueue) {
void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); }
SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) {
int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList);
for(int32_t i = 0; i < num; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) {
int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList);
if (num == 0) {
return;
}
for (int32_t i = 0; i < num; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
pInfo->dataAllowed = true;
}
}
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) {
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId);
if (pInfo != NULL) {
pInfo->dataAllowed = false;
}
}
SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) {
int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList);
for (int32_t i = 0; i < num; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
if (pInfo->taskId == taskId) {
return pInfo;
}
}
qError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId);
return NULL;
}
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) {
int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList);
if (num == 0) {
return;
}
for(int32_t i = 0; i < num; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
pInfo->dataAllowed = true;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -13,99 +13,72 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "streamInc.h"
#include "streamInt.h"
int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) {
int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pReq->mgmtEps) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->mnodeId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) {
int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pReq->mgmtEps) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->mnodeId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) {
int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->nodeId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1;
if (tEncodeI8(pEncoder, pRsp->success) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) {
int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->nodeId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1;
if (tDecodeI8(pDecoder, &pRsp->success) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq) {
int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->upstreamTaskId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->upstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1;
if (tEncodeI8(pEncoder, pReq->taskLevel) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1;
if (tDecodeI8(pDecoder, &pReq->taskLevel) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->upstreamTaskId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->upstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1;
if (tEncodeI8(pEncoder, pRsp->taskLevel) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) {
int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1;
@ -114,83 +87,258 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR
if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1;
if (tDecodeI8(pDecoder, &pRsp->taskLevel) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) {
if (pTask->checkpointingId == 0) {
pTask->checkpointingId = checkpointId;
pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList);
static int32_t streamAlignCheckpoint(SStreamTask* pTask) {
int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList);
int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num);
if (old == 0) {
qDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num);
}
ASSERT(pTask->checkpointingId == checkpointId);
return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1);
}
static int32_t streamDoCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) {
// commit tdb state
streamStateCommit(pTask->pState);
// commit non-tdb state
// copy and save new state
// report to mnode
// send checkpoint req to downstream
static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) {
SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock));
if (pChkpoint == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pChkpoint->type = checkpointType;
SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
if (pBlock == NULL) {
taosFreeQitem(pChkpoint);
return TSDB_CODE_OUT_OF_MEMORY;
}
pBlock->info.type = STREAM_CHECKPOINT;
pBlock->info.version = pTask->checkpointingId;
pBlock->info.rows = 1;
pBlock->info.childId = pTask->info.selfChildId;
pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock));//pBlock;
taosArrayPush(pChkpoint->blocks, pBlock);
taosMemoryFree(pBlock);
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pChkpoint) < 0) {
taosFreeQitem(pChkpoint);
return TSDB_CODE_OUT_OF_MEMORY;
}
streamSchedExec(pTask);
return TSDB_CODE_SUCCESS;
}
int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
// 1. set task status to be prepared for check point, no data are allowed to put into inputQ.
taosThreadMutexLock(&pTask->lock);
pTask->status.taskStatus = TASK_STATUS__CK;
pTask->checkpointingId = pReq->checkpointId;
pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
// 2. let's dispatch checkpoint msg to downstream task directly and do nothing else. put the checkpoint block into
// inputQ, to make sure all blocks with less version have been handled by this task already.
int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER);
taosThreadMutexUnlock(&pTask->lock);
return code;
}
static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) {
pBlock->srcTaskId = pTask->id.taskId;
pBlock->srcVgId = pTask->pMeta->vgId;
int32_t code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock);
if (code == 0) {
streamDispatchStreamBlock(pTask);
} else {
streamFreeQitem((SStreamQueueItem*)pBlock);
}
return code;
}
int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) {
SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0);
int64_t checkpointId = pDataBlock->info.version;
const char* id = pTask->id.idStr;
int32_t code = TSDB_CODE_SUCCESS;
// set the task status
pTask->checkpointingId = checkpointId;
// set task status
pTask->status.taskStatus = TASK_STATUS__CK;
{ // todo: remove this when the pipeline checkpoint generating is used.
SStreamMeta* pMeta = pTask->pMeta;
taosWLockLatch(&pMeta->lock);
if (pMeta->chkptNotReadyTasks == 0) {
pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta);
pMeta->totalTasks = pMeta->chkptNotReadyTasks;
}
taosWUnLockLatch(&pMeta->lock);
}
//todo fix race condition: set the status and append checkpoint block
int32_t taskLevel = pTask->info.taskLevel;
if (taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
qDebug("s-task:%s set childIdx:%d, and add checkpoint block into outputQ", id, pTask->info.selfChildId);
continueDispatchCheckpointBlock(pBlock, pTask);
} else { // only one task exists, no need to dispatch downstream info
streamProcessCheckpointReadyMsg(pTask);
streamFreeQitem((SStreamQueueItem*)pBlock);
}
} else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) {
ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) > 0);
// update the child Id for downstream tasks
streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId);
// there are still some upstream tasks not send checkpoint request, do nothing and wait for then
int32_t notReady = streamAlignCheckpoint(pTask);
int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList);
if (notReady > 0) {
qDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d",
id, pTask->info.selfChildId, notReady, num);
streamFreeQitem((SStreamQueueItem*)pBlock);
return code;
}
if (taskLevel == TASK_LEVEL__SINK) {
pTask->status.taskStatus = TASK_STATUS__CK_READY;
qDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream",
id, num);
streamFreeQitem((SStreamQueueItem*)pBlock);
streamTaskBuildCheckpoint(pTask);
} else {
qDebug(
"s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg "
"downstream", id, num);
// set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task
// can start local checkpoint procedure
pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
// if all upstreams are ready for generating checkpoint, set the status to be TASK_STATUS__CK_READY
// put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task
// already. And then, dispatch check point msg to all downstream tasks
code = continueDispatchCheckpointBlock(pBlock, pTask);
}
}
return code;
}
/**
* All down stream tasks have successfully completed the check point task.
* Current stream task is allowed to start to do checkpoint things in ASYNC model.
*/
int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG);
// only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task
int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1);
ASSERT(notReady >= 0);
if (notReady == 0) {
qDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task",
pTask->id.idStr);
appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT);
} else {
int32_t total = streamTaskGetNumOfDownstream(pTask);
qDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total);
}
return 0;
}
static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) {
// ref wal
// set status checkpointing
// do checkpoint
return 0;
}
int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) {
int32_t code;
int64_t checkpointId = pReq->checkpointId;
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) {
taosWLockLatch(&pMeta->lock);
code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId);
if (code < 0) {
// rsp error
int64_t keys[2];
for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) {
SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i);
keys[0] = pId->streamId;
keys[1] = pId->taskId;
SStreamTask* p = *(SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys));
if (p->info.fillHistory == 1) {
continue;
}
int8_t prev = p->status.taskStatus;
ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId);
p->chkInfo.checkpointId = p->checkpointingId;
streamSetStatusNormal(p);
// save the task
streamMetaSaveTask(pMeta, p);
streamTaskOpenAllUpstreamInput(p); // open inputQ for all upstream tasks
qDebug("vgId:%d s-task:%s level:%d commit task status after checkpoint completed, checkpointId:%" PRId64
", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s",
pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.currentVer,
streamGetTaskStatusStr(prev));
}
if (streamMetaCommit(pMeta) < 0) {
taosWUnLockLatch(&pMeta->lock);
qError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId,
checkpointId, terrstr());
return -1;
} else {
taosWUnLockLatch(&pMeta->lock);
qInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId);
}
return 0;
return TSDB_CODE_SUCCESS;
}
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq) {
int32_t code;
int64_t checkpointId = pReq->checkpointId;
int32_t childId = pReq->childId;
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) {
int32_t code = 0;
if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) {
code = streamAlignCheckpoint(pTask, checkpointId, childId);
if (code > 0) {
return 0;
}
if (code < 0) {
ASSERT(0);
return -1;
}
// check for all tasks, and do generate the vnode-wide checkpoint data.
SStreamMeta* pMeta = pTask->pMeta;
int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1);
ASSERT(remain >= 0);
if (remain == 0) { // all tasks are in TASK_STATUS__CK_READY state
qDebug("s-task:%s is ready for checkpoint", pTask->id.idStr);
pMeta->totalTasks = 0;
streamBackendDoCheckpoint(pMeta, pTask->checkpointingId);
streamSaveAllTaskStatus(pMeta, pTask->checkpointingId);
qDebug("vgId:%d vnode wide checkpoint completed, save all tasks status, checkpointId:%" PRId64, pMeta->vgId,
pTask->checkpointingId);
} else {
qDebug("vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, not ready:%d/%d", pMeta->vgId,
pTask->id.idStr, remain, pMeta->totalTasks);
}
code = streamDoCheckpoint(pMeta, pTask, checkpointId);
if (code < 0) {
// rsp error
return -1;
// send check point response to upstream task
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
code = streamTaskSendCheckpointSourceRsp(pTask);
} else {
code = streamTaskSendCheckpointReadyMsg(pTask);
}
// send rsp to all children
if (code != TSDB_CODE_SUCCESS) {
// todo: let's retry send rsp to upstream/mnode
qError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr,
pTask->checkpointingId, tstrerror(code));
}
return 0;
return code;
}
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) {
// recover step2, scan from wal
// unref wal
// set status normal
return 0;
}
#endif

View File

@ -15,7 +15,7 @@
#include "streamInt.h"
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) {
SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) {
SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, pReq->totalLen);
if (pData == NULL) {
return NULL;
@ -23,6 +23,7 @@ SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq
pData->type = blockType;
pData->srcVgId = srcVg;
pData->srcTaskId = pReq->upstreamTaskId;
int32_t blockNum = pReq->blockNum;
SArray* pArray = taosArrayInit_s(sizeof(SSDataBlock), blockNum);
@ -60,16 +61,15 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT
return NULL;
}
pStreamBlocks->srcTaskId = pTask->id.taskId;
pStreamBlocks->type = STREAM_INPUT__DATA_BLOCK;
pStreamBlocks->blocks = pRes;
if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem;
pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pSubmit->ver;
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem;
pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pMerged->ver;
}
@ -121,6 +121,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) {
return NULL;
}
pDataSubmit->ver = pData->ver;
pDataSubmit->submit = *pData;
*pDataSubmit->dataRef = 1; // initialize the reference count to be 1
pDataSubmit->type = type;
@ -199,6 +200,11 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem*
}
}
static void freeItems(void* param) {
SSDataBlock* pBlock = param;
taosArrayDestroy(pBlock->pDataBlock);
}
void streamFreeQitem(SStreamQueueItem* data) {
int8_t type = data->type;
if (type == STREAM_INPUT__GET_RES) {
@ -232,5 +238,22 @@ void streamFreeQitem(SStreamQueueItem* data) {
SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data;
blockDataDestroy(pRefBlock->pBlock);
taosFreeQitem(pRefBlock);
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
SStreamDataBlock* pBlock = (SStreamDataBlock*) data;
taosArrayDestroyEx(pBlock->blocks, freeItems);
taosFreeQitem(pBlock);
}
}
const char* streamGetBlockTypeStr(int32_t type) {
switch (type) {
case STREAM_INPUT__CHECKPOINT:
return "checkpoint";
case STREAM_INPUT__CHECKPOINT_TRIGGER:
return "checkpoint-trigger";
case STREAM_INPUT__TRANS_STATE:
return "trans-state";
default:
return "";
}
}

View File

@ -14,7 +14,9 @@
*/
#include "streamInt.h"
#include "trpc.h"
#include "ttimer.h"
#include "tmisce.h"
#define MAX_BLOCK_NAME_NUM 1024
#define DISPATCH_RETRY_INTERVAL_MS 300
@ -25,22 +27,38 @@ typedef struct SBlockName {
char parTbName[TSDB_TABLE_NAME_LEN];
} SBlockName;
typedef struct {
int32_t upStreamTaskId;
SEpSet upstreamNodeEpset;
SRpcMsg msg;
} SStreamChkptReadyInfo;
static void doRetryDispatchData(void* param, void* tmrId);
static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet);
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq);
static int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock,
int32_t vgSz, int64_t groupId);
static int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet);
static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId,
int32_t numOfBlocks, int64_t dstTaskId, int32_t type);
static void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) {
void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) {
pMsg->msgType = msgType;
pMsg->pCont = pCont;
pMsg->contLen = contLen;
}
static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) {
int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->type) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->type) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->upstreamChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->blockNum) < 0) return -1;
@ -57,44 +75,15 @@ static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatc
return pEncoder->pos;
}
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->upstreamChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->blockNum) < 0) return -1;
@ -113,14 +102,16 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
taosArrayPush(pReq->dataLen, &len1);
taosArrayPush(pReq->data, &data);
}
tEndDecode(pDecoder);
return 0;
}
int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks,
int64_t dstTaskId, int32_t type) {
static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId,
int32_t numOfBlocks, int64_t dstTaskId, int32_t type) {
pReq->streamId = pTask->id.streamId;
pReq->srcVgId = vgId;
pReq->stage = pTask->pMeta->stage;
pReq->upstreamTaskId = pTask->id.taskId;
pReq->upstreamChildId = pTask->info.selfChildId;
pReq->upstreamNodeId = pTask->info.nodeId;
@ -205,11 +196,11 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
.retrieveLen = dataStrLen,
};
int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int32_t sz = taosArrayGetSize(pTask->pUpstreamInfoList);
ASSERT(sz > 0);
for (int32_t i = 0; i < sz; i++) {
req.reqId = tGenIdPI64();
SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
req.dstNodeId = pEpInfo->nodeId;
req.dstTaskId = pEpInfo->taskId;
int32_t len;
@ -231,7 +222,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
tEncodeStreamRetrieveReq(&encoder, &req);
tEncoderClear(&encoder);
SRpcMsg rpcMsg = {.code = 0, .msgType = TDMT_STREAM_RETRIEVE, .pCont = buf, .contLen = sizeof(SMsgHead) + len};
SRpcMsg rpcMsg = {0};
initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE, buf, len + sizeof(SMsgHead));
if (tmsgSendReq(&pEpInfo->epSet, &rpcMsg) < 0) {
ASSERT(0);
goto CLEAR;
@ -274,175 +267,16 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR
rpcFreeCont(buf);
return code;
}
tEncoderClear(&encoder);
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = TDMT_STREAM_TASK_CHECK;
qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr,
initRpcMsg(&msg, TDMT_VND_STREAM_TASK_CHECK, buf, tlen + sizeof(SMsgHead));
qDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr,
pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId);
tmsgSendReq(pEpSet, &msg);
return 0;
}
int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) {
if (buf) {
rpcFreeCont(buf);
}
return code;
}
tEncoderClear(&encoder);
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH;
tmsgSendReq(pEpSet, &msg);
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus,
pReq->downstreamTaskId, vgId);
return 0;
}
static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
// serialize
int32_t tlen;
tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code);
if (code < 0) {
goto FAIL;
}
code = -1;
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
goto FAIL;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) {
goto FAIL;
}
tEncoderClear(&encoder);
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = pTask->msgInfo.msgType;
qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg, len:%d", pTask->id.idStr, pReq->taskId, vgId,
msg.contLen);
return tmsgSendReq(pEpSet, &msg);
FAIL:
if (buf) {
rpcFreeCont(buf);
}
return code;
}
int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz,
int64_t groupId) {
uint32_t hashValue = 0;
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
if (pTask->pNameMap == NULL) {
pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
}
void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t));
if (pVal) {
SBlockName* pBln = (SBlockName*)pVal;
hashValue = pBln->hashValue;
if (!pDataBlock->info.parTbName[0]) {
memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN);
memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName));
}
} else {
char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
if (ctbName == NULL) {
return -1;
}
if (pDataBlock->info.parTbName[0]) {
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
} else {
buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName);
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
}
/*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/
SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo;
hashValue =
taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix);
taosMemoryFree(ctbName);
SBlockName bln = {0};
bln.hashValue = hashValue;
memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName));
if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) {
tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName));
}
}
bool found = false;
// TODO: optimize search
int32_t j;
for (j = 0; j < vgSz; j++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j);
ASSERT(pVgInfo->vgId > 0);
if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
return -1;
}
if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
}
pReqs[j].blockNum++;
found = true;
break;
}
}
ASSERT(found);
return 0;
}
static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) {
int32_t code = 0;
int32_t numOfBlocks = taosArrayGetSize(pData->blocks);
@ -569,7 +403,11 @@ static void doRetryDispatchData(void* param, void* tmrId) {
if (!streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr);
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
if (streamTaskShouldPause(&pTask->status)) {
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS * 10);
} else {
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS);
}
} else {
atomic_sub_fetch_8(&pTask->status.timerActive, 1);
qDebug("s-task:%s should stop, abort from timer", pTask->id.idStr);
@ -580,34 +418,101 @@ static void doRetryDispatchData(void* param, void* tmrId) {
}
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) {
qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration);
qError("s-task:%s dispatch data in %" PRId64 "ms", pTask->id.idStr, waitDuration);
taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer);
}
int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
STaskOutputInfo* pInfo = &pTask->outputInfo;
ASSERT((pInfo->type == TASK_OUTPUT__FIXED_DISPATCH || pInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH));
int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz,
int64_t groupId) {
uint32_t hashValue = 0;
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
if (pTask->pNameMap == NULL) {
pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
}
int32_t numOfElems = taosQueueItemSize(pInfo->queue->queue);
void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t));
if (pVal) {
SBlockName* pBln = (SBlockName*)pVal;
hashValue = pBln->hashValue;
if (!pDataBlock->info.parTbName[0]) {
memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN);
memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName));
}
} else {
char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
if (ctbName == NULL) {
return -1;
}
if (pDataBlock->info.parTbName[0]) {
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
} else {
buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName);
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
}
/*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/
SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo;
hashValue =
taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix);
taosMemoryFree(ctbName);
SBlockName bln = {0};
bln.hashValue = hashValue;
memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName));
if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) {
tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName));
}
}
bool found = false;
// TODO: optimize search
int32_t j;
for (j = 0; j < vgSz; j++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j);
ASSERT(pVgInfo->vgId > 0);
if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
return -1;
}
if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
}
pReqs[j].blockNum++;
found = true;
break;
}
}
ASSERT(found);
return 0;
}
int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH));
const char* id = pTask->id.idStr;
int32_t numOfElems = taosQueueItemSize(pTask->outputInfo.queue->queue);
if (numOfElems > 0) {
qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr,
numOfElems);
qDebug("s-task:%s try to dispatch intermediate block to downstream, elem in outputQ:%d", id, numOfElems);
}
// to make sure only one dispatch is running
int8_t old = atomic_val_compare_exchange_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT);
int8_t old =
atomic_val_compare_exchange_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT);
if (old != TASK_OUTPUT_STATUS__NORMAL) {
qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old);
qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", id, old);
return 0;
}
ASSERT(pTask->msgInfo.pData == NULL);
qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pInfo->status);
qDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputInfo.status);
SStreamDataBlock* pBlock = streamQueueNextItem(pInfo->queue);
SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputInfo.queue);
if (pBlock == NULL) {
atomic_store_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL);
qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pInfo->status);
atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL);
qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputInfo.status);
return 0;
}
@ -623,8 +528,8 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
break;
}
qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr,
tstrerror(terrno), pInfo->status, retryCount);
qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", id,
tstrerror(terrno), pTask->outputInfo.status, retryCount);
// todo deal with only partially success dispatch case
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
@ -646,6 +551,294 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
return TSDB_CODE_SUCCESS;
}
int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) {
SStreamScanHistoryFinishReq req = {
.streamId = pTask->id.streamId,
.childId = pTask->info.selfChildId,
.upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->pMeta->vgId,
};
// serialize
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) {
req.downstreamTaskId = pTask->fixedEpDispatcher.taskId;
pTask->notReadyTasks = 1;
doDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
pTask->notReadyTasks = numOfVgs;
qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr,
numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus));
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.downstreamTaskId = pVgInfo->taskId;
doDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
} else {
qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr);
streamProcessScanHistoryFinishRsp(pTask);
}
return 0;
}
// this function is usually invoked by sink/agg task
int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) {
int32_t num = taosArrayGetSize(pTask->pReadyMsgList);
ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) == num);
for (int32_t i = 0; i < num; ++i) {
SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i);
tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg);
qDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel,
pInfo->upStreamTaskId);
}
taosArrayClear(pTask->pReadyMsgList);
qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num);
return TSDB_CODE_SUCCESS;
}
// this function is only invoked by source task, and send rsp to mnode
int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosArrayGetSize(pTask->pReadyMsgList) == 1);
SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, 0);
tmsgSendRsp(&pInfo->msg);
taosArrayClear(pTask->pReadyMsgList);
qDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel);
return TSDB_CODE_SUCCESS;
}
int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) {
if (buf) {
rpcFreeCont(buf);
}
return code;
}
tEncoderClear(&encoder);
initRpcMsg(&msg, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, buf, tlen + sizeof(SMsgHead));
tmsgSendReq(pEpSet, &msg);
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus,
pReq->downstreamTaskId, vgId);
return 0;
}
int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
// serialize
int32_t tlen;
tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code);
if (code < 0) {
goto FAIL;
}
code = -1;
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
goto FAIL;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) {
goto FAIL;
}
tEncoderClear(&encoder);
initRpcMsg(&msg, pTask->msgInfo.msgType, buf, tlen + sizeof(SMsgHead));
qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId);
return tmsgSendReq(pEpSet, &msg);
FAIL:
if (buf) {
rpcFreeCont(buf);
}
return code;
}
int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg,
int8_t isSucceed) {
int32_t len = 0;
int32_t code = 0;
SEncoder encoder;
SStreamCheckpointSourceRsp rsp = {
.checkpointId = pReq->checkpointId,
.taskId = pReq->taskId,
.nodeId = pReq->nodeId,
.streamId = pReq->streamId,
.expireTime = pReq->expireTime,
.mnodeId = pReq->mnodeId,
.success = isSucceed,
};
tEncodeSize(tEncodeStreamCheckpointSourceRsp, &rsp, len, code);
if (code < 0) {
return code;
}
void* pBuf = rpcMallocCont(sizeof(SMsgHead) + len);
if (pBuf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
((SMsgHead*)pBuf)->vgId = htonl(pReq->mnodeId);
void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead));
tEncoderInit(&encoder, (uint8_t*)abuf, len);
tEncodeStreamCheckpointSourceRsp(&encoder, &rsp);
tEncoderClear(&encoder);
initRpcMsg(pMsg, 0, pBuf, sizeof(SMsgHead) + len);
pMsg->info = *pRpcInfo;
return 0;
}
int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo,
SStreamTask* pTask, int8_t isSucceed) {
SStreamChkptReadyInfo info = {0};
buildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, isSucceed);
if (pTask->pReadyMsgList == NULL) {
pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo));
}
taosArrayPush(pTask->pReadyMsgList, &info);
qDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList));
return TSDB_CODE_SUCCESS;
}
int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) {
int32_t code = 0;
int32_t tlen = 0;
void* buf = NULL;
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
return TSDB_CODE_SUCCESS;
}
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId);
SStreamCheckpointReadyMsg req = {0};
req.downstreamNodeId = pTask->pMeta->vgId;
req.downstreamTaskId = pTask->id.taskId;
req.streamId = pTask->id.streamId;
req.checkpointId = checkpointId;
req.childId = pInfo->childId;
req.upstreamNodeId = pInfo->nodeId;
req.upstreamTaskId = pInfo->taskId;
tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code);
if (code < 0) {
return -1;
}
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
return -1;
}
((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) {
rpcFreeCont(buf);
return code;
}
tEncoderClear(&encoder);
ASSERT(req.upstreamTaskId != 0);
SStreamChkptReadyInfo info = {.upStreamTaskId = pInfo->taskId, .upstreamNodeEpset = pInfo->epSet};
initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead));
info.msg.info.noResp = 1; // refactor later.
qDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d",
pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.downstreamNodeId, index);
if (pTask->pReadyMsgList == NULL) {
pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo));
}
taosArrayPush(pTask->pReadyMsgList, &info);
return 0;
}
int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
@ -668,7 +861,7 @@ int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistory
return 0;
}
int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) {
int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen) {
int32_t len = 0;
int32_t code = 0;
SEncoder encoder;
@ -699,6 +892,16 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo,
tEncodeCompleteHistoryDataMsg(&encoder, &msg);
tEncoderClear(&encoder);
*pBuffer = pBuf;
*pLen = len;
return 0;
}
int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) {
void* pBuf = NULL;
int32_t len = 0;
streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len);
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId);
SStreamContinueExecInfo info = {.taskId = pReq->upstreamTaskId, .epset = pInfo->epSet};
@ -821,3 +1024,57 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
return 0;
}
int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1;
int32_t size = taosArrayGetSize(pMsg->pNodeList);
if (tEncodeI32(pEncoder, size) < 0) return -1;
for (int32_t i = 0; i < size; ++i) {
SNodeUpdateInfo* pInfo = taosArrayGet(pMsg->pNodeList, i);
if (tEncodeI32(pEncoder, pInfo->nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pInfo->prevEp) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pInfo->newEp) < 0) return -1;
}
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1;
int32_t size = 0;
if (tDecodeI32(pDecoder, &size) < 0) return -1;
pMsg->pNodeList = taosArrayInit(size, sizeof(SNodeUpdateInfo));
for (int32_t i = 0; i < size; ++i) {
SNodeUpdateInfo info = {0};
if (tDecodeI32(pDecoder, &info.nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &info.prevEp) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &info.newEp) < 0) return -1;
taosArrayPush(pMsg->pNodeList, &info);
}
tEndDecode(pDecoder);
return 0;
}
int32_t tEncodeStreamTaskUpdateRsp(SEncoder* pEncoder, const SStreamTaskNodeUpdateRsp* pMsg) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeStreamTaskUpdateRsp(SDecoder* pDecoder, SStreamTaskNodeUpdateRsp* pMsg) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}

View File

@ -20,7 +20,6 @@
#define MIN_STREAM_EXEC_BATCH_NUM 4
#define STREAM_RESULT_DUMP_THRESHOLD 100
static int32_t updateCheckPointInfo(SStreamTask* pTask);
static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask);
bool streamTaskShouldStop(const SStreamStatus* pStatus) {
@ -30,17 +29,11 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus) {
bool streamTaskShouldPause(const SStreamStatus* pStatus) {
int32_t status = atomic_load_8((int8_t*)&pStatus->taskStatus);
return (status == TASK_STATUS__PAUSE || status == TASK_STATUS__HALT);
return (status == TASK_STATUS__PAUSE);
}
static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize,
int32_t* totalBlocks) {
int32_t code = updateCheckPointInfo(pTask);
if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
return code;
}
int32_t numOfBlocks = taosArrayGetSize(pRes);
if (numOfBlocks > 0) {
SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes);
@ -53,7 +46,7 @@ static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray*
qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks,
SIZE_IN_MB(size));
code = streamTaskOutputResultBlock(pTask, pStreamBlocks);
int32_t code = streamTaskOutputResultBlock(pTask, pStreamBlocks);
if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { // back pressure and record position
destroyStreamDataBlock(pStreamBlocks);
return -1;
@ -103,7 +96,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
resetTaskInfo(pExecutor);
}
qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr());
qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, tstrerror(code));
continue;
}
@ -119,8 +112,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
taosArrayPush(pRes, &block);
numOfBlocks += 1;
qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId,
pRetrieveBlock->reqId);
qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr,
pTask->info.selfChildId, pRetrieveBlock->reqId);
}
break;
@ -131,6 +124,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
// TODO
}
continue;
} else if (output->info.type == STREAM_CHECKPOINT) {
continue; // checkpoint block not dispatch to downstream tasks
}
SSDataBlock block = {0};
@ -253,49 +248,27 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) {
return 0;
}
int32_t updateCheckPointInfo(SStreamTask* pTask) {
int64_t ckId = 0;
int64_t dataVer = 0;
qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId);
int32_t streamTaskGetInputQItems(const SStreamTask* pTask) {
int32_t numOfItems1 = taosQueueItemSize(pTask->inputQueue->queue);
int32_t numOfItems2 = taosQallItemSize(pTask->inputQueue->qall);
SCheckpointInfo* pCkInfo = &pTask->chkInfo;
if (ckId > pCkInfo->id) { // save it since the checkpoint is updated
qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64
", checkPoint id:%" PRId64 " -> %" PRId64,
pTask->id.idStr, pCkInfo->version, dataVer, pCkInfo->id, ckId);
pTask->chkInfo = (SCheckpointInfo){.version = dataVer, .id = ckId, .currentVer = pCkInfo->currentVer};
taosWLockLatch(&pTask->pMeta->lock);
streamMetaSaveTask(pTask->pMeta, pTask);
if (streamMetaCommit(pTask->pMeta) < 0) {
taosWUnLockLatch(&pTask->pMeta->lock);
qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr());
return -1;
} else {
taosWUnLockLatch(&pTask->pMeta->lock);
qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr);
}
}
return TSDB_CODE_SUCCESS;
return numOfItems1 + numOfItems2;
}
// wait for the stream task to be idle
static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) {
// wait for the stream task to be idle
int64_t st = taosGetTimestampMs();
const char* id = pTask->id.idStr;
int64_t st = taosGetTimestampMs();
while (!streamTaskIsIdle(pStreamTask)) {
qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr,
pTask->info.taskLevel, pStreamTask->id.idStr);
qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel,
pStreamTask->id.idStr);
taosMsleep(100);
}
double el = (taosGetTimestampMs() - st) / 1000.0;
if (el > 0) {
qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", pTask->id.idStr,
pStreamTask->id.idStr, el);
qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", id, pStreamTask->id.idStr, el);
}
}
@ -335,7 +308,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) {
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
ASSERT(status == TASK_STATUS__HALT || status == TASK_STATUS__DROPPING);
} else {
ASSERT(status == TASK_STATUS__SCAN_HISTORY);
ASSERT(status == TASK_STATUS__NORMAL);
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
qDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr);
}
@ -388,7 +361,8 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) {
// 7. pause allowed.
streamTaskEnablePause(pStreamTask);
if (taosQueueEmpty(pStreamTask->inputQueue->queue)) {
SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);;
SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
pDelBlock->info.rows = 0;
pDelBlock->info.version = 0;
@ -419,89 +393,53 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) {
return code;
}
static int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) {
int32_t retryTimes = 0;
int32_t MAX_RETRY_TIMES = 5;
const char* id = pTask->id.idStr;
// set input
static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_t* pVer, const char* id) {
void* pExecutor = pTask->exec.pExecutor;
if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one
while (1) {
if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
const SStreamQueueItem* pItem = pInput;
if (pItem->type == STREAM_INPUT__GET_RES) {
const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput;
qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id);
return TSDB_CODE_SUCCESS;
}
} else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput;
qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit,
pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver);
ASSERT((*pVer) <= pSubmit->submit.ver);
(*pVer) = pSubmit->submit.ver;
qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type);
} else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) {
const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput;
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
}
}
SArray* pBlockList = pBlock->blocks;
int32_t numOfBlocks = taosArrayGetSize(pBlockList);
qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer);
qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK);
// non sink task
while (1) {
if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput;
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) {
taosMsleep(10);
qDebug("try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id);
continue;
}
SArray* pBlockList = pMerged->submits;
int32_t numOfBlocks = taosArrayGetSize(pBlockList);
qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks,
pMerged->ver);
qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT);
ASSERT((*pVer) <= pMerged->ver);
(*pVer) = pMerged->ver;
qDebug("break batchSize:%d, %s", *numOfBlocks, id);
return TSDB_CODE_SUCCESS;
}
} else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) {
const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput;
qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__CHECKPOINT || pItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
const SStreamDataBlock* pCheckpoint = (const SStreamDataBlock*)pInput;
qSetMultiStreamInput(pExecutor, pCheckpoint->blocks, 1, pItem->type);
// do not merge blocks for sink node and check point data block
if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER ||
qItem->type == STREAM_INPUT__TRANS_STATE) {
if (*pInput == NULL) {
qDebug("s-task:%s checkpoint/transtate msg extracted, start to process immediately", id);
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
} else {
// previous existed blocks needs to be handle, before handle the checkpoint msg block
qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id, *numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
} else {
if (*pInput == NULL) {
ASSERT((*numOfBlocks) == 0);
*pInput = qItem;
} else {
// todo we need to sort the data block, instead of just appending into the array list.
void* newRet = streamMergeQueueItem(*pInput, qItem);
if (newRet == NULL) {
qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
*pInput = newRet;
}
*numOfBlocks += 1;
streamQueueProcessSuccess(pTask->inputQueue);
if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) {
qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM);
return TSDB_CODE_SUCCESS;
}
}
ASSERT(0);
}
}
@ -569,20 +507,28 @@ int32_t streamExecForAll(SStreamTask* pTask) {
const char* id = pTask->id.idStr;
while (1) {
int32_t batchSize = 0;
int32_t numOfBlocks = 0;
SStreamQueueItem* pInput = NULL;
if (streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s stream task stopped, abort", id);
qDebug("s-task:%s stream task is stopped", id);
break;
}
// merge multiple input data if possible in the input queue.
qDebug("s-task:%s start to extract data block from inputQ", id);
/*int32_t code = */extractBlocksFromInputQ(pTask, &pInput, &batchSize);
/*int32_t code = */ extractBlocksFromInputQ(pTask, &pInput, &numOfBlocks);
if (pInput == NULL) {
ASSERT(batchSize == 0);
break;
ASSERT(numOfBlocks == 0);
return 0;
}
int32_t type = pInput->type;
// dispatch checkpoint msg to all downstream tasks
if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput);
continue;
}
if (pInput->type == STREAM_INPUT__TRANS_STATE) {
@ -591,60 +537,52 @@ int32_t streamExecForAll(SStreamTask* pTask) {
}
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK);
qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize);
ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT);
if (type == STREAM_INPUT__DATA_BLOCK) {
qDebug("s-task:%s sink task start to sink %d blocks", id, numOfBlocks);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput);
continue;
}
}
int64_t st = taosGetTimestampMs();
qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize);
{
// set input
void* pExecutor = pTask->exec.pExecutor;
const SStreamQueueItem* pItem = pInput;
if (pItem->type == STREAM_INPUT__GET_RES) {
const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput;
qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput;
qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit,
pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver);
} else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) {
const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput;
qDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type);
SArray* pBlockList = pBlock->blocks;
int32_t numOfBlocks = taosArrayGetSize(pBlockList);
qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer);
qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput;
SArray* pBlockList = pMerged->submits;
int32_t numOfBlocks = taosArrayGetSize(pBlockList);
qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d", id, pTask, numOfBlocks);
qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT);
} else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) {
const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput;
qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else {
ASSERT(0);
}
}
int64_t ver = pTask->chkInfo.checkpointVer;
doSetStreamInputBlock(pTask, pInput, &ver, id);
int64_t resSize = 0;
int32_t totalBlocks = 0;
streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks);
double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d",
id, el, SIZE_IN_MB(resSize), totalBlocks);
qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el,
SIZE_IN_MB(resSize), totalBlocks);
// update the currentVer if processing the submit blocks.
ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.currentVer && ver >= pTask->chkInfo.checkpointVer);
if (ver != pTask->chkInfo.checkpointVer) {
qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64, pTask->id.idStr,
pTask->chkInfo.checkpointVer, ver);
pTask->chkInfo.checkpointVer = ver;
}
streamFreeQitem(pInput);
// todo other thread may change the status
// do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed.
if (type == STREAM_INPUT__CHECKPOINT) {
// ASSERT(pTask->status.taskStatus == TASK_STATUS__CK);
// pTask->status.taskStatus = TASK_STATUS__CK_READY;
qDebug("s-task:%s checkpoint block received, set the status:%s", pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskBuildCheckpoint(pTask);
return 0;
}
}
return 0;
@ -671,7 +609,8 @@ int32_t streamTryExec(SStreamTask* pTask) {
return -1;
}
// todo the task should be commit here
// streamTaskBuildCheckpoint(pTask);
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus),
pTask->status.schedStatus);
@ -711,7 +650,7 @@ int32_t streamTaskReloadState(SStreamTask* pTask) {
}
int32_t streamAlignTransferState(SStreamTask* pTask) {
int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList);
int32_t old = atomic_val_compare_exchange_32(&pTask->transferStateAlignCnt, 0, numOfUpstream);
if (old == 0) {
qDebug("s-task:%s set the transfer state aligncnt %d", pTask->id.idStr, numOfUpstream);

View File

@ -16,49 +16,113 @@
#include "executor.h"
#include "streamBackendRocksdb.h"
#include "streamInt.h"
#include "tmisce.h"
#include "tref.h"
#include "tstream.h"
#include "ttimer.h"
#define META_HB_CHECK_INTERVAL 200
#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec
#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1)
static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT;
int32_t streamBackendId = 0;
int32_t streamBackendCfWrapperId = 0;
int32_t streamMetaId = 0;
static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta);
static void metaHbToMnode(void* param, void* tmrId);
static void streamMetaClear(SStreamMeta* pMeta);
static int32_t streamMetaBegin(SStreamMeta* pMeta);
static void streamMetaCloseImpl(void* arg);
static void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask);
typedef struct {
TdThreadMutex mutex;
SHashObj* pTable;
} SMetaRefMgt;
SMetaRefMgt gMetaRefMgt;
void metaRefMgtInit();
void metaRefMgtCleanup();
int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid);
static void streamMetaEnvInit() {
streamBackendId = taosOpenRef(64, streamBackendCleanup);
streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup);
streamMetaId = taosOpenRef(64, streamMetaCloseImpl);
metaRefMgtInit();
}
void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); }
void streamMetaCleanup() {
taosCloseRef(streamBackendId);
taosCloseRef(streamBackendCfWrapperId);
taosCloseRef(streamMetaId);
metaRefMgtCleanup();
}
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) {
void metaRefMgtInit() {
taosThreadMutexInit(&(gMetaRefMgt.mutex), NULL);
gMetaRefMgt.pTable = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK);
}
void metaRefMgtCleanup() {
void* pIter = taosHashIterate(gMetaRefMgt.pTable, NULL);
while (pIter) {
SArray* list = *(SArray**)pIter;
for (int i = 0; i < taosArrayGetSize(list); i++) {
void* rid = taosArrayGetP(list, i);
taosMemoryFree(rid);
}
taosArrayDestroy(list);
pIter = taosHashIterate(gMetaRefMgt.pTable, pIter);
}
taosHashCleanup(gMetaRefMgt.pTable);
taosThreadMutexDestroy(&gMetaRefMgt.mutex);
}
int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) {
taosThreadMutexLock(&gMetaRefMgt.mutex);
void* p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId));
if (p == NULL) {
SArray* list = taosArrayInit(8, sizeof(void*));
taosArrayPush(list, &rid);
taosHashPut(gMetaRefMgt.pTable, &vgId, sizeof(vgId), &list, sizeof(void*));
} else {
SArray* list = *(SArray**)p;
taosArrayPush(list, &rid);
}
taosThreadMutexUnlock(&gMetaRefMgt.mutex);
return 0;
}
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) {
int32_t code = -1;
SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta));
if (pMeta == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
qError("vgId:%d failed to prepare stream meta, alloc size:%" PRIzu ", out of memory", vgId, sizeof(SStreamMeta));
return NULL;
}
int32_t len = strlen(path) + 20;
char* streamPath = taosMemoryCalloc(1, len);
sprintf(streamPath, "%s/%s", path, "stream");
pMeta->path = taosStrdup(streamPath);
int32_t len = strlen(path) + 64;
char* tpath = taosMemoryCalloc(1, len);
sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream");
pMeta->path = tpath;
if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) {
goto _err;
}
memset(streamPath, 0, len);
sprintf(streamPath, "%s/%s", pMeta->path, "checkpoints");
code = taosMulModeMkDir(streamPath, 0755, false);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
goto _err;
}
if (tdbTbOpen("task.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) {
if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) {
goto _err;
}
@ -66,6 +130,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err;
}
if (streamMetaBegin(pMeta) < 0) {
goto _err;
}
_hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR);
pMeta->pTasks = taosHashInit(64, fp, true, HASH_NO_LOCK);
if (pMeta->pTasks == NULL) {
@ -73,80 +141,187 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
}
// task list
pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamId));
pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamTaskId));
if (pMeta->pTaskList == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
if (streamMetaBegin(pMeta) < 0) {
goto _err;
}
pMeta->walScanCounter = 0;
pMeta->vgId = vgId;
pMeta->ahandle = ahandle;
pMeta->expandFunc = expandFunc;
pMeta->stage = stage;
memset(streamPath, 0, len);
sprintf(streamPath, "%s/%s", pMeta->path, "state");
code = taosMulModeMkDir(streamPath, 0755, false);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
goto _err;
}
// send heartbeat every 5sec.
pMeta->rid = taosAddRef(streamMetaId, pMeta);
int64_t* pRid = taosMemoryMalloc(sizeof(int64_t));
*pRid = pMeta->rid;
metaRefMgtAdd(pMeta->vgId, pRid);
pMeta->hbInfo.hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer);
pMeta->hbInfo.tickCounter = 0;
pMeta->hbInfo.stopFlag = 0;
pMeta->streamBackend = streamBackendInit(streamPath);
if (pMeta->streamBackend == NULL) {
goto _err;
}
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
pMeta->pTaskBackendUnique =
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpCap = 8;
taosInitRWLatch(&pMeta->chkpDirLock);
taosMemoryFree(streamPath);
pMeta->chkpId = streamGetLatestCheckpointId(pMeta);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
while (pMeta->streamBackend == NULL) {
taosMsleep(2 * 1000);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
if (pMeta->streamBackend == NULL) {
qError("vgId:%d failed to init stream backend", pMeta->vgId);
qInfo("vgId:%d retry to init stream backend", pMeta->vgId);
}
}
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
code = streamBackendLoadCheckpointInfo(pMeta);
taosInitRWLatch(&pMeta->lock);
taosThreadMutexInit(&pMeta->backendMutex, NULL);
pMeta->pauseTaskNum = 0;
qInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId,
stage);
return pMeta;
_err:
taosMemoryFree(streamPath);
taosMemoryFree(pMeta->path);
if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks);
if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList);
if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb);
if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb);
if (pMeta->db) tdbClose(pMeta->db);
// if (pMeta->streamBackend) streamBackendCleanup(pMeta->streamBackend);
// taosThreadMutexDestroy(&pMeta->backendMutex);
// taosThreadRwlockDestroy(&pMeta->lock);
taosMemoryFree(pMeta);
qError("failed to open stream meta");
return NULL;
}
int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) {
streamMetaClear(pMeta);
pMeta->streamBackendRid = -1;
pMeta->streamBackend = NULL;
char* defaultPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128);
sprintf(defaultPath, "%s%s%s", pMeta->path, TD_DIRSEP, "state");
taosRemoveDir(defaultPath);
char* newPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128);
sprintf(newPath, "%s%s%s", pMeta->path, TD_DIRSEP, "received");
int32_t code = taosStatFile(newPath, NULL, NULL, NULL);
if (code == 0) {
// directory exists
code = taosRenameFile(newPath, defaultPath);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
qError("vgId:%d failed to rename file, from %s to %s, code:%s", pMeta->vgId, newPath, defaultPath,
tstrerror(terrno));
taosMemoryFree(defaultPath);
taosMemoryFree(newPath);
return -1;
}
}
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
while (pMeta->streamBackend == NULL) {
taosMsleep(2 * 1000);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
if (pMeta->streamBackend == NULL) {
qError("vgId:%d failed to init stream backend", pMeta->vgId);
qInfo("vgId:%d retry to init stream backend", pMeta->vgId);
// return -1;
}
}
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
streamBackendLoadCheckpointInfo(pMeta);
return 0;
}
void streamMetaClear(SStreamMeta* pMeta) {
void* pIter = NULL;
while ((pIter = taosHashIterate(pMeta->pTasks, pIter)) != NULL) {
SStreamTask* p = *(SStreamTask**)pIter;
// release the ref by timer
if (p->triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer
qDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt);
taosTmrStop(p->schedTimer);
p->triggerParam = 0;
streamMetaReleaseTask(pMeta, p);
}
streamMetaReleaseTask(pMeta, p);
}
taosRemoveRef(streamBackendId, pMeta->streamBackendRid);
taosHashClear(pMeta->pTasks);
taosHashClear(pMeta->pTaskBackendUnique);
taosArrayClear(pMeta->pTaskList);
taosArrayClear(pMeta->chkpSaved);
taosArrayClear(pMeta->chkpInUse);
}
void streamMetaClose(SStreamMeta* pMeta) {
qDebug("start to close stream meta");
if (pMeta == NULL) {
return;
}
// int64_t rid = *(int64_t*)pMeta->pRid;
// if (taosTmrStop(pMeta->hbInfo.hbTmr)) {
// taosMemoryFree(pMeta->pRid);
// } else {
// // do nothing, stop by timer thread
// }
taosRemoveRef(streamMetaId, pMeta->rid);
}
void streamMetaCloseImpl(void* arg) {
SStreamMeta* pMeta = arg;
qDebug("start to do-close stream meta");
if (pMeta == NULL) {
return;
}
streamMetaClear(pMeta);
tdbAbort(pMeta->db, pMeta->txn);
tdbTbClose(pMeta->pTaskDb);
tdbTbClose(pMeta->pCheckpointDb);
tdbClose(pMeta->db);
void* pIter = NULL;
while (1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
if (pIter == NULL) {
break;
}
tFreeStreamTask(*(SStreamTask**)pIter);
}
taosArrayDestroy(pMeta->pTaskList);
taosArrayDestroy(pMeta->chkpSaved);
taosArrayDestroy(pMeta->chkpInUse);
taosHashCleanup(pMeta->pTasks);
taosRemoveRef(streamBackendId, pMeta->streamBackendRid);
pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList);
taosHashCleanup(pMeta->pTaskBackendUnique);
taosMemoryFree(pMeta->path);
taosThreadMutexDestroy(&pMeta->backendMutex);
taosHashCleanup(pMeta->pTaskBackendUnique);
taosMemoryFree(pMeta);
qDebug("end to close stream meta");
}
int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
@ -168,7 +343,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
tEncodeStreamTask(&encoder, pTask);
tEncoderClear(&encoder);
if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) {
int64_t key[2] = {0};
extractStreamTaskKey(key, pTask);
if (tdbTbUpsert(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) {
qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno));
return -1;
}
@ -177,12 +355,18 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
return 0;
}
int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
int32_t code = tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(taskId), pMeta->txn);
void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask) {
pKey[0] = pTask->id.streamId;
pKey[1] = pTask->id.taskId;
}
int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) {
int32_t code = tdbTbDelete(pMeta->pTaskDb, pKey, STREAM_TASK_KEY_LEN, pMeta->txn);
if (code != 0) {
qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, taskId, tstrerror(terrno));
qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pKey[1],
tstrerror(terrno));
} else {
qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, taskId);
qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pKey[1]);
}
return code;
@ -226,6 +410,22 @@ int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) {
return (int32_t)size;
}
int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) {
int32_t num = 0;
size_t size = taosArrayGetSize(pMeta->pTaskList);
for (int32_t i = 0; i < size; ++i) {
SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i);
int64_t keys[2] = {pId->streamId, pId->taskId};
SStreamTask** p = taosHashGet(pMeta->pTasks, keys, sizeof(keys));
if ((*p)->info.fillHistory == 0) {
num += 1;
}
}
return num;
}
SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) {
taosRLockLatch(&pMeta->lock);
@ -244,7 +444,7 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t
return NULL;
}
void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
void streamMetaReleaseTask(SStreamMeta* UNUSED_PARAM(pMeta), SStreamTask* pTask) {
int32_t ref = atomic_sub_fetch_32(&pTask->refCnt, 1);
if (ref > 0) {
qTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref);
@ -257,9 +457,9 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
}
}
static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamId* id) {
static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamTaskId* id) {
for (int32_t i = 0; i < num; ++i) {
SStreamId* pTaskId = taosArrayGet(pMeta->pTaskList, i);
SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i);
if (pTaskId->streamId == id->streamId && pTaskId->taskId == id->taskId) {
taosArrayRemove(pMeta->pTaskList, i);
break;
@ -277,6 +477,10 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys));
if (ppTask) {
pTask = *ppTask;
if (streamTaskShouldPause(&pTask->status)) {
int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1);
qInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num);
}
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
} else {
qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId);
@ -317,7 +521,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t
ASSERT(pTask->status.timerActive == 0);
doRemoveIdFromList(pMeta, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id);
streamMetaRemoveTask(pMeta, taskId);
streamMetaRemoveTask(pMeta, keys);
streamMetaReleaseTask(pMeta, pTask);
} else {
qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId);
@ -338,38 +542,73 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) {
// todo add error log
int32_t streamMetaCommit(SStreamMeta* pMeta) {
if (tdbCommit(pMeta->db, pMeta->txn) < 0) {
qError("failed to commit stream meta");
qError("vgId:%d failed to commit stream meta", pMeta->vgId);
return -1;
}
if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) {
qError("failed to commit stream meta");
qError("vgId:%d failed to do post-commit stream meta", pMeta->vgId);
return -1;
}
if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
qError("vgId:%d failed to begin trans", pMeta->vgId);
return -1;
}
return 0;
}
int32_t streamMetaAbort(SStreamMeta* pMeta) {
if (tdbAbort(pMeta->db, pMeta->txn) < 0) {
return -1;
int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) {
int64_t chkpId = 0;
TBC* pCur = NULL;
if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) {
return chkpId;
}
if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
void* pKey = NULL;
int32_t kLen = 0;
void* pVal = NULL;
int32_t vLen = 0;
SDecoder decoder;
tdbTbcMoveToFirst(pCur);
while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) {
if (pVal == NULL || vLen == 0) {
break;
}
return 0;
SCheckpointInfo info;
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) {
continue;
}
tDecoderClear(&decoder);
chkpId = TMAX(chkpId, info.checkpointId);
}
qDebug("get max chkp id: %" PRId64 "", chkpId);
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
return chkpId;
}
int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
taosArrayDestroy(pRecycleList);
}
int32_t streamLoadTasks(SStreamMeta* pMeta) {
TBC* pCur = NULL;
qInfo("vgId:%d load stream tasks from meta files", pMeta->vgId);
if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) {
qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno));
return -1;
@ -380,26 +619,20 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
void* pVal = NULL;
int32_t vLen = 0;
SDecoder decoder;
SArray* pRecycleList = taosArrayInit(4, sizeof(int32_t));
SArray* pRecycleList = taosArrayInit(4, STREAM_TASK_KEY_LEN);
tdbTbcMoveToFirst(pCur);
while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) {
SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
if (pTask == NULL) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
taosArrayDestroy(pRecycleList);
doClear(pKey, pVal, pCur, pRecycleList);
return -1;
}
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
if (tDecodeStreamTask(&decoder, pTask) < 0) {
tDecoderClear(&decoder);
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
taosArrayDestroy(pRecycleList);
doClear(pKey, pVal, pCur, pRecycleList);
tFreeStreamTask(pTask);
qError(
"stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream "
@ -413,8 +646,10 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
int32_t taskId = pTask->id.taskId;
tFreeStreamTask(pTask);
taosArrayPush(pRecycleList, &taskId);
int64_t key[2] = {0};
extractStreamTaskKey(key, pTask);
taosArrayPush(pRecycleList, key);
int32_t total = taosArrayGetSize(pRecycleList);
qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total);
continue;
@ -424,12 +659,9 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId};
void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys));
if (p == NULL) {
if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.version) < 0) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer) < 0) {
doClear(pKey, pVal, pCur, pRecycleList);
tFreeStreamTask(pTask);
taosArrayDestroy(pRecycleList);
return -1;
}
@ -437,22 +669,24 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
} else {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
taosMemoryFree(pTask);
continue;
}
streamTaskResetUpstreamStageInfo(pTask);
if (taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, sizeof(void*)) < 0) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
doClear(pKey, pVal, pCur, pRecycleList);
tFreeStreamTask(pTask);
taosArrayDestroy(pRecycleList);
return -1;
}
if (streamTaskShouldPause(&pTask->status)) {
atomic_add_fetch_32(&pMeta->pauseTaskNum, 1);
}
ASSERT(pTask->status.downstreamReady == 0);
}
qInfo("vgId:%d pause task num:%d", pMeta->vgId, pMeta->pauseTaskNum);
tdbFree(pKey);
tdbFree(pVal);
@ -463,12 +697,212 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
if (taosArrayGetSize(pRecycleList) > 0) {
for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) {
int32_t taskId = *(int32_t*)taosArrayGet(pRecycleList, i);
streamMetaRemoveTask(pMeta, taskId);
int64_t* pId = taosArrayGet(pRecycleList, i);
streamMetaRemoveTask(pMeta, pId);
}
}
qDebug("vgId:%d load %d task from disk", pMeta->vgId, (int32_t)taosArrayGetSize(pMeta->pTaskList));
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
qDebug("vgId:%d load %d tasks into meta from disk completed", pMeta->vgId, numOfTasks);
taosArrayDestroy(pRecycleList);
return 0;
}
int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->vgId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->numOfTasks) < 0) return -1;
for (int32_t i = 0; i < pReq->numOfTasks; ++i) {
STaskStatusEntry* ps = taosArrayGet(pReq->pTaskStatus, i);
if (tEncodeI64(pEncoder, ps->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, ps->taskId) < 0) return -1;
if (tEncodeI32(pEncoder, ps->status) < 0) return -1;
}
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->vgId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->numOfTasks) < 0) return -1;
pReq->pTaskStatus = taosArrayInit(pReq->numOfTasks, sizeof(STaskStatusEntry));
for (int32_t i = 0; i < pReq->numOfTasks; ++i) {
STaskStatusEntry hb = {0};
if (tDecodeI64(pDecoder, &hb.streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &hb.taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &hb.status) < 0) return -1;
taosArrayPush(pReq->pTaskStatus, &hb);
}
tEndDecode(pDecoder);
return 0;
}
static bool readyToSendHb(SMetaHbInfo* pInfo) {
if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) {
// reset the counter
pInfo->tickCounter = 0;
return true;
}
return false;
}
void metaHbToMnode(void* param, void* tmrId) {
int64_t rid = *(int64_t*)param;
SStreamHbMsg hbMsg = {0};
SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid);
if (pMeta == NULL) {
// taosMemoryFree(param);
return;
}
// need to stop, stop now
if (pMeta->hbInfo.stopFlag == STREAM_META_WILL_STOP) {
pMeta->hbInfo.stopFlag = STREAM_META_OK_TO_STOP;
qDebug("vgId:%d jump out of meta timer", pMeta->vgId);
taosReleaseRef(streamMetaId, rid);
return;
}
if (!readyToSendHb(&pMeta->hbInfo)) {
taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr);
taosReleaseRef(streamMetaId, rid);
return;
}
taosRLockLatch(&pMeta->lock);
int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta);
SEpSet epset = {0};
hbMsg.vgId = pMeta->vgId;
hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry));
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i);
int64_t keys[2] = {pId->streamId, pId->taskId};
SStreamTask** pTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys));
if ((*pTask)->info.fillHistory == 1) {
continue;
}
STaskStatusEntry entry = {.streamId = pId->streamId, .taskId = pId->taskId, .status = (*pTask)->status.taskStatus};
taosArrayPush(hbMsg.pTaskStatus, &entry);
if (i == 0) {
epsetAssign(&epset, &(*pTask)->info.mnodeEpset);
}
}
hbMsg.numOfTasks = taosArrayGetSize(hbMsg.pTaskStatus);
taosRUnLockLatch(&pMeta->lock);
int32_t code = 0;
int32_t tlen = 0;
tEncodeSize(tEncodeStreamHbMsg, &hbMsg, tlen, code);
if (code < 0) {
qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code));
taosArrayDestroy(hbMsg.pTaskStatus);
taosReleaseRef(streamMetaId, rid);
return;
}
void* buf = rpcMallocCont(tlen);
if (buf == NULL) {
qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY));
taosArrayDestroy(hbMsg.pTaskStatus);
taosReleaseRef(streamMetaId, rid);
return;
}
SEncoder encoder;
tEncoderInit(&encoder, buf, tlen);
if ((code = tEncodeStreamHbMsg(&encoder, &hbMsg)) < 0) {
rpcFreeCont(buf);
qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code));
taosArrayDestroy(hbMsg.pTaskStatus);
taosReleaseRef(streamMetaId, rid);
return;
}
tEncoderClear(&encoder);
taosArrayDestroy(hbMsg.pTaskStatus);
SRpcMsg msg = {0};
initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen);
msg.info.noResp = 1;
qDebug("vgId:%d, build and send hb to mnode", pMeta->vgId);
tmsgSendReq(&epset, &msg);
taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr);
taosReleaseRef(streamMetaId, rid);
}
static bool hasStreamTaskInTimer(SStreamMeta* pMeta) {
bool inTimer = false;
taosWLockLatch(&pMeta->lock);
void* pIter = NULL;
while (1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
if (pIter == NULL) {
break;
}
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->status.timerActive >= 1) {
inTimer = true;
}
}
taosWUnLockLatch(&pMeta->lock);
return inTimer;
}
void streamMetaNotifyClose(SStreamMeta* pMeta) {
int32_t vgId = pMeta->vgId;
qDebug("vgId:%d notify all stream tasks that the vnode is closing", vgId);
taosWLockLatch(&pMeta->lock);
void* pIter = NULL;
while (1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
if (pIter == NULL) {
break;
}
SStreamTask* pTask = *(SStreamTask**)pIter;
qDebug("vgId:%d s-task:%s set closing flag", vgId, pTask->id.idStr);
streamTaskStop(pTask);
}
taosWUnLockLatch(&pMeta->lock);
// wait for the stream meta hb function stopping
pMeta->hbInfo.stopFlag = STREAM_META_WILL_STOP;
while (pMeta->hbInfo.stopFlag != STREAM_META_OK_TO_STOP) {
taosMsleep(100);
qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId);
}
qDebug("vgId:%d start to check all tasks", vgId);
int64_t st = taosGetTimestampMs();
while (hasStreamTaskInTimer(pMeta)) {
qDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
int64_t el = taosGetTimestampMs() - st;
qDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el);
}

View File

@ -15,33 +15,49 @@
#include "streamInt.h"
#define MAX_STREAM_EXEC_BATCH_NUM 32
#define MIN_STREAM_EXEC_BATCH_NUM 4
SStreamQueue* streamQueueOpen(int64_t cap) {
SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue));
if (pQueue == NULL) return NULL;
if (pQueue == NULL) {
return NULL;
}
pQueue->queue = taosOpenQueue();
pQueue->qall = taosAllocateQall();
if (pQueue->queue == NULL || pQueue->qall == NULL) {
goto FAIL;
}
pQueue->status = STREAM_QUEUE__SUCESS;
taosSetQueueCapacity(pQueue->queue, cap);
taosSetQueueMemoryCapacity(pQueue->queue, cap * 1024);
return pQueue;
FAIL:
if (pQueue->queue == NULL || pQueue->qall == NULL) {
if (pQueue->queue) taosCloseQueue(pQueue->queue);
if (pQueue->qall) taosFreeQall(pQueue->qall);
taosMemoryFree(pQueue);
return NULL;
}
pQueue->status = STREAM_QUEUE__SUCESS;
taosSetQueueCapacity(pQueue->queue, cap);
taosSetQueueMemoryCapacity(pQueue->queue, cap * 1024);
return pQueue;
}
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) {
qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->queue, taosQueueItemSize(pQueue->queue));
void streamQueueCleanup(SStreamQueue* pQueue) {
void* qItem = NULL;
while ((qItem = streamQueueNextItem(pQueue)) != NULL) {
streamFreeQitem(qItem);
}
pQueue->status = STREAM_QUEUE__SUCESS;
}
// void streamQueueClose(SStreamQueue* pQueue) {
// streamQueueCleanup(pQueue);
// taosFreeQall(pQueue->qall);
// taosCloseQueue(pQueue->queue);
// taosMemoryFree(pQueue);
// }
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) {
qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->queue, taosQueueItemSize(pQueue->queue));
streamQueueCleanup(pQueue);
taosFreeQall(pQueue->qall);
taosCloseQueue(pQueue->queue);
@ -106,9 +122,6 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) {
}
#endif
#define MAX_STREAM_EXEC_BATCH_NUM 128
#define MIN_STREAM_EXEC_BATCH_NUM 16
// todo refactor:
// read data from input queue
typedef struct SQueueReader {
@ -118,6 +131,7 @@ typedef struct SQueueReader {
int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms
} SQueueReader;
#if 0
SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* idstr) {
int32_t numOfBlocks = 0;
int32_t tryCount = 0;
@ -163,3 +177,100 @@ SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char*
return pRet;
}
#endif
int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) {
int32_t retryTimes = 0;
int32_t MAX_RETRY_TIMES = 5;
const char* id = pTask->id.idStr;
if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one
while (1) {
if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id);
return TSDB_CODE_SUCCESS;
}
qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type);
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
}
}
// non sink task
while (1) {
if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) {
qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks);
return TSDB_CODE_SUCCESS;
}
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) {
taosMsleep(10);
qDebug("===stream===try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id);
continue;
}
qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id);
return TSDB_CODE_SUCCESS;
}
// do not merge blocks for sink node and check point data block
if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER ||
qItem->type == STREAM_INPUT__TRANS_STATE) {
if (*pInput == NULL) {
char* p = NULL;
if (qItem->type == STREAM_INPUT__CHECKPOINT) {
p = "checkpoint";
} else if (qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
p = "checkpoint-trigger";
} else {
p = "transtate";
}
qDebug("s-task:%s %s msg extracted, start to process immediately", id, p);
*numOfBlocks = 1;
*pInput = qItem;
return TSDB_CODE_SUCCESS;
} else {
// previous existed blocks needs to be handle, before handle the checkpoint msg block
qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id,
*numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
} else {
if (*pInput == NULL) {
ASSERT((*numOfBlocks) == 0);
*pInput = qItem;
} else {
// todo we need to sort the data block, instead of just appending into the array list.
void* newRet = streamMergeQueueItem(*pInput, qItem);
if (newRet == NULL) {
qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks);
streamQueueProcessFail(pTask->inputQueue);
return TSDB_CODE_SUCCESS;
}
*pInput = newRet;
}
*numOfBlocks += 1;
streamQueueProcessSuccess(pTask->inputQueue);
if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) {
qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM);
return TSDB_CODE_SUCCESS;
}
}
}
}

View File

@ -13,7 +13,9 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <tstream.h>
#include "streamInt.h"
#include "trpc.h"
#include "ttimer.h"
#include "wal.h"
@ -63,7 +65,10 @@ const char* streamGetTaskStatusStr(int32_t status) {
case TASK_STATUS__SCAN_HISTORY: return "scan-history";
case TASK_STATUS__HALT: return "halt";
case TASK_STATUS__PAUSE: return "paused";
case TASK_STATUS__CK: return "check-point";
case TASK_STATUS__CK_READY: return "check-point-ready";
case TASK_STATUS__DROPPING: return "dropping";
case TASK_STATUS__STOP: return "stop";
default:return "";
}
}
@ -92,8 +97,9 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) {
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
if (pTask->info.fillHistory) {
streamSetParamForScanHistory(pTask);
}
streamTaskEnablePause(pTask);
}
streamTaskScanHistoryPrepare(pTask);
} else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr);
@ -112,6 +118,7 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) {
.upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->info.nodeId,
.childId = pTask->info.selfChildId,
.stage = pTask->pMeta->stage,
};
// serialize
@ -122,9 +129,9 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) {
pTask->checkReqId = req.reqId;
qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64
"-%" PRId64 ", req:0x%" PRIx64,
"-%" PRId64 ", stage:%"PRId64" req:0x%" PRIx64,
pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer,
pWindow->skey, pWindow->ekey, req.reqId);
pWindow->skey, pWindow->ekey, req.stage, req.reqId);
streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
@ -143,8 +150,8 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) {
taosArrayPush(pTask->checkReqIds, &req.reqId);
req.downstreamNodeId = pVgInfo->vgId;
req.downstreamTaskId = pVgInfo->taskId;
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId, i);
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d, stage:%" PRId64,
pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i, req.stage);
streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
} else {
@ -169,12 +176,12 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p
.downstreamTaskId = pRsp->downstreamTaskId,
.downstreamNodeId = pRsp->downstreamNodeId,
.childId = pRsp->childId,
.stage = pTask->pMeta->stage,
};
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId);
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) {
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr,
pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage);
streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
@ -183,6 +190,8 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
if (pVgInfo->taskId == req.downstreamTaskId) {
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr,
pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage);
streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet);
}
}
@ -191,8 +200,28 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p
return 0;
}
int32_t streamTaskCheckStatus(SStreamTask* pTask) {
return (pTask->status.downstreamReady == 1)? 1:0;
int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) {
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId);
ASSERT(pInfo != NULL);
if (stage == -1) {
qDebug("s-task:%s receive check msg from upstream task:0x%x, invalid stageId:%" PRId64 ", not ready", pTask->id.idStr,
upstreamTaskId, stage);
return 0;
}
if (pInfo->stage == -1) {
pInfo->stage = stage;
qDebug("s-task:%s receive check msg from upstream task:0x%x, init stage value:%" PRId64, pTask->id.idStr,
upstreamTaskId, stage);
}
if (pInfo->stage < stage) {
qError("s-task:%s receive msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 ", prev:%" PRId64,
pTask->id.idStr, upstreamTaskId, vgId, stage, pInfo->stage);
}
return ((pTask->status.downstreamReady == 1) && (pInfo->stage == stage))? 1:0;
}
static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) {
@ -216,6 +245,7 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) {
launchFillHistoryTask(pTask);
}
// todo handle error
int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
ASSERT(pTask->id.taskId == pRsp->upstreamTaskId);
const char* id = pTask->id.idStr;
@ -259,10 +289,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs
doProcessDownstreamReadyRsp(pTask, 1);
}
} else { // not ready, wait for 100ms and retry
qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId,
pRsp->downstreamNodeId);
qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, wait for 100ms and retry", id,
pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage);
taosMsleep(100);
streamRecheckDownstream(pTask, pRsp);
}
@ -339,39 +368,6 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) {
return streamScanExec(pTask, 100);
}
int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) {
SStreamScanHistoryFinishReq req = {
.streamId = pTask->id.streamId,
.childId = pTask->info.selfChildId,
.upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->pMeta->vgId,
};
// serialize
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) {
req.downstreamTaskId = pTask->fixedEpDispatcher.taskId;
pTask->notReadyTasks = 1;
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
pTask->notReadyTasks = numOfVgs;
qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr,
numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus));
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.downstreamTaskId = pVgInfo->taskId;
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
} else {
qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr);
streamProcessScanHistoryFinishRsp(pTask);
}
return 0;
}
int32_t appendTranstateIntoInputQ(SStreamTask* pTask) {
SStreamDataBlock* pTranstate = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock));
if (pTranstate == NULL) {
@ -405,7 +401,7 @@ int32_t appendTranstateIntoInputQ(SStreamTask* pTask) {
// agg
int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask) {
pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList);
pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList);
qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s",
pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream,
streamGetTaskStatusStr(pTask->status.taskStatus));
@ -429,14 +425,31 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory
int32_t taskLevel = pTask->info.taskLevel;
ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK);
// sink node do not send end of scan history msg to its upstream, which is agg task.
if (pTask->status.taskStatus != TASK_STATUS__SCAN_HISTORY) {
qError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly",
pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->upstreamTaskId);
void* pBuf = NULL;
int32_t len = 0;
streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len);
SRpcMsg msg = {.info = *pRpcInfo};
initRpcMsg(&msg, 0, pBuf, sizeof(SMsgHead) + len);
tmsgSendRsp(&msg);
qDebug("s-task:%s level:%d notify upstream:0x%x(vgId:%d) to continue process data from WAL", pTask->id.idStr,
pTask->info.taskLevel, pReq->upstreamTaskId, pReq->upstreamNodeId);
return 0;
}
// sink tasks do not send end of scan history msg to its upstream, which is agg task.
streamAddEndScanHistoryMsg(pTask, pRpcInfo, pReq);
int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1);
ASSERT(left >= 0);
if (left == 0) {
int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamInfoList);
qDebug(
"s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data and send "
"rsp to all upstream tasks",
@ -446,11 +459,18 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory
streamAggUpstreamScanHistoryFinish(pTask);
}
// all upstream tasks have completed the scan-history task in the stream time window, let's start to extract data
// from the WAL files, which contains the real time stream data.
streamNotifyUpstreamContinue(pTask);
// sink node does not receive the pause msg from mnode, so does not need enable it
if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
streamTaskEnablePause(pTask);
// mnode will not send the pause/resume message to the sink task, so no need to enable the pause for sink tasks.
if (taskLevel == TASK_LEVEL__AGG) {
/*int32_t code = */streamTaskScanHistoryDataComplete(pTask);
} else { // for sink task, set normal
if (pTask->status.taskStatus != TASK_STATUS__PAUSE && pTask->status.taskStatus != TASK_STATUS__STOP &&
pTask->status.taskStatus != TASK_STATUS__DROPPING) {
streamSetStatusNormal(pTask);
}
}
} else {
qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d",
@ -659,6 +679,7 @@ int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq*
if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
@ -672,6 +693,7 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq)
if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
@ -685,6 +707,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp*
if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1;
if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
@ -699,6 +722,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp)
if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1;
if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1;
tEndDecode(pDecoder);
return 0;
@ -776,22 +800,18 @@ void launchFillHistoryTask(SStreamTask* pTask) {
streamLaunchFillHistoryTask(pTask);
}
// only the downstream tasks are ready, set the task to be ready to work.
void streamTaskCheckDownstreamTasks(SStreamTask* pTask) {
if (pTask->info.fillHistory) {
qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
return;
}
ASSERT(pTask->status.downstreamReady == 0);
// check downstream tasks for itself
streamTaskDoCheckDownstreamTasks(pTask);
}
// normal -> pause, pause/stop/dropping -> pause, halt -> pause, scan-history -> pause
void streamTaskPause(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) {
int64_t st = taosGetTimestampMs();
int8_t status = pTask->status.taskStatus;
@ -806,6 +826,12 @@ void streamTaskPause(SStreamTask* pTask) {
return;
}
if(pTask->info.taskLevel == TASK_LEVEL__SINK) {
int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1);
qInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num);
return;
}
while (!pTask->status.pauseAllowed || (pTask->status.taskStatus == TASK_STATUS__HALT)) {
status = pTask->status.taskStatus;
if (status == TASK_STATUS__DROPPING) {
@ -835,6 +861,8 @@ void streamTaskPause(SStreamTask* pTask) {
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1);
qInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num);
taosWUnLockLatch(&pMeta->lock);
// in case of fill-history task, stop the tsdb file scan operation.
@ -844,16 +872,20 @@ void streamTaskPause(SStreamTask* pTask) {
}
int64_t el = taosGetTimestampMs() - st;
qDebug("vgId:%d s-task:%s set pause flag, prev:%s, elapsed time:%dms", pMeta->vgId, pTask->id.idStr,
qDebug("vgId:%d s-task:%s set pause flag, prev:%s, pause elapsed time:%dms", pMeta->vgId, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.keepTaskStatus), (int32_t)el);
}
void streamTaskResume(SStreamTask* pTask) {
void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta) {
int8_t status = pTask->status.taskStatus;
if (status == TASK_STATUS__PAUSE) {
pTask->status.taskStatus = pTask->status.keepTaskStatus;
pTask->status.keepTaskStatus = TASK_STATUS__NORMAL;
qDebug("s-task:%s resume from pause", pTask->id.idStr);
int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1);
qInfo("vgId:%d s-task:%s resume from pause, status%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num);
} else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1);
qInfo("vgId:%d s-task:%s sink task.resume from pause, status%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num);
} else {
qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status));
}

View File

@ -0,0 +1,499 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "streamSnapshot.h"
#include "query.h"
#include "rocksdb/c.h"
#include "streamBackendRocksdb.h"
#include "tcommon.h"
enum SBackendFileType {
ROCKSDB_OPTIONS_TYPE = 1,
ROCKSDB_MAINFEST_TYPE = 2,
ROCKSDB_SST_TYPE = 3,
ROCKSDB_CURRENT_TYPE = 4,
ROCKSDB_CHECKPOINT_META_TYPE = 5,
};
typedef struct SBackendFileItem {
char* name;
int8_t type;
int64_t size;
} SBackendFileItem;
typedef struct SBackendFile {
char* pCurrent;
char* pMainfest;
char* pOptions;
SArray* pSst;
char* pCheckpointMeta;
char* path;
} SBanckendFile;
struct SStreamSnapHandle {
void* handle;
SBanckendFile* pBackendFile;
int64_t checkpointId;
int64_t seraial;
int64_t offset;
TdFilePtr fd;
int8_t filetype;
SArray* pFileList;
int32_t currFileIdx;
};
struct SStreamSnapBlockHdr {
int8_t type;
int8_t flag;
int64_t index;
char name[128];
int64_t totalSize;
int64_t size;
uint8_t data[];
};
struct SStreamSnapReader {
void* pMeta;
int64_t sver;
int64_t ever;
SStreamSnapHandle handle;
int64_t checkpointId;
};
struct SStreamSnapWriter {
void* pMeta;
int64_t sver;
int64_t ever;
SStreamSnapHandle handle;
};
const char* ROCKSDB_OPTIONS = "OPTIONS";
const char* ROCKSDB_MAINFEST = "MANIFEST";
const char* ROCKSDB_SST = "sst";
const char* ROCKSDB_CURRENT = "CURRENT";
const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT";
static int64_t kBlockSize = 64 * 1024;
int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta);
void streamSnapHandleDestroy(SStreamSnapHandle* handle);
// static void streamBuildFname(char* path, char* file, char* fullname)
#define STREAM_ROCKSDB_BUILD_FULLNAME(path, file, fullname) \
do { \
sprintf(fullname, "%s%s%s", path, TD_DIRSEP, file); \
} while (0)
int32_t streamGetFileSize(char* path, char* name, int64_t* sz) {
int ret = 0;
char* fullname = taosMemoryCalloc(1, strlen(path) + 32);
sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name);
ret = taosStatFile(fullname, sz, NULL, NULL);
taosMemoryFree(fullname);
return ret;
}
TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) {
char fullname[256] = {0};
STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname);
return taosOpenFile(fullname, opt);
}
int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) {
// impl later
int len = strlen(path);
char* tdir = taosMemoryCalloc(1, len + 256);
memcpy(tdir, path, len);
int32_t code = 0;
int8_t validChkp = 0;
if (chkpId != 0) {
sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP,
chkpId);
if (taosIsDir(tdir)) {
validChkp = 1;
qInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir);
streamBackendAddInUseChkp(pMeta, chkpId);
} else {
qWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, tdir);
}
}
// no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it
if (validChkp == 0) {
sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state");
char* chkpdir = taosMemoryCalloc(1, len + 256);
sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp");
taosMemoryFree(tdir);
tdir = chkpdir;
qInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir);
code = streamBackendTriggerChkp(pMeta, tdir);
if (code != 0) {
qError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir);
taosMemoryFree(tdir);
return code;
}
chkpId = 0;
}
qInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir);
TdDirPtr pDir = taosOpenDir(tdir);
if (NULL == pDir) {
qError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir);
goto _err;
}
SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile));
pHandle->pBackendFile = pFile;
pHandle->checkpointId = chkpId;
pHandle->seraial = 0;
pFile->path = tdir;
pFile->pSst = taosArrayInit(16, sizeof(void*));
TdDirEntryPtr pDirEntry;
while ((pDirEntry = taosReadDir(pDir)) != NULL) {
char* name = taosGetDirEntryName(pDirEntry);
if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) {
pFile->pCurrent = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) {
pFile->pMainfest = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) {
pFile->pOptions = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) &&
0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) {
pFile->pCheckpointMeta = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_SST) &&
0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) {
char* sst = taosStrdup(name);
taosArrayPush(pFile->pSst, &sst);
}
}
{
char* buf = taosMemoryCalloc(1, 512);
sprintf(buf, "[current: %s,", pFile->pCurrent);
sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest);
sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions);
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* name = taosArrayGetP(pFile->pSst, i);
sprintf(buf + strlen(buf), "%s,", name);
}
sprintf(buf + strlen(buf) - 1, "]");
qInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf);
taosMemoryFree(buf);
}
taosCloseDir(&pDir);
if (pFile->pCurrent == NULL) {
qError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir);
code = -1;
tdir = NULL;
goto _err;
}
SArray* list = taosArrayInit(64, sizeof(SBackendFileItem));
SBackendFileItem item;
// current
item.name = pFile->pCurrent;
item.type = ROCKSDB_CURRENT_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// mainfest
item.name = pFile->pMainfest;
item.type = ROCKSDB_MAINFEST_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// options
item.name = pFile->pOptions;
item.type = ROCKSDB_OPTIONS_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// sst
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* sst = taosArrayGetP(pFile->pSst, i);
item.name = sst;
item.type = ROCKSDB_SST_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
}
// meta
item.name = pFile->pCheckpointMeta;
item.type = ROCKSDB_CHECKPOINT_META_TYPE;
if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) {
taosArrayPush(list, &item);
}
pHandle->pBackendFile = pFile;
pHandle->currFileIdx = 0;
pHandle->pFileList = list;
pHandle->seraial = 0;
pHandle->offset = 0;
pHandle->handle = pMeta;
return 0;
_err:
streamSnapHandleDestroy(pHandle);
taosMemoryFreeClear(tdir);
code = -1;
return code;
}
void streamSnapHandleDestroy(SStreamSnapHandle* handle) {
SBanckendFile* pFile = handle->pBackendFile;
if (handle->checkpointId == 0) {
// del tmp dir
if (taosIsDir(pFile->path)) {
taosRemoveDir(pFile->path);
}
} else {
streamBackendDelInUseChkp(handle->handle, handle->checkpointId);
}
if (pFile) {
taosMemoryFree(pFile->pCheckpointMeta);
taosMemoryFree(pFile->pCurrent);
taosMemoryFree(pFile->pMainfest);
taosMemoryFree(pFile->pOptions);
taosMemoryFree(pFile->path);
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* sst = taosArrayGetP(pFile->pSst, i);
taosMemoryFree(sst);
}
taosArrayDestroy(pFile->pSst);
taosMemoryFree(pFile);
}
taosArrayDestroy(handle->pFileList);
taosCloseFile(&handle->fd);
return;
}
int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* path, SStreamSnapReader** ppReader) {
// impl later
SStreamSnapReader* pReader = taosMemoryCalloc(1, sizeof(SStreamSnapReader));
if (pReader == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) {
taosMemoryFree(pReader);
return -1;
}
*ppReader = pReader;
return 0;
}
int32_t streamSnapReaderClose(SStreamSnapReader* pReader) {
if (pReader == NULL) return 0;
streamSnapHandleDestroy(&pReader->handle);
taosMemoryFree(pReader);
return 0;
}
int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) {
// impl later
int32_t code = 0;
SStreamSnapHandle* pHandle = &pReader->handle;
SBanckendFile* pFile = pHandle->pBackendFile;
SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
if (pHandle->fd == NULL) {
if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) {
// finish
*ppData = NULL;
*size = 0;
return 0;
} else {
pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ);
qDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
}
}
qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize);
int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset);
if (nread == -1) {
code = TAOS_SYSTEM_ERROR(terrno);
qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name,
item->type, tstrerror(code));
return -1;
} else if (nread > 0 && nread <= kBlockSize) {
// left bytes less than kBlockSize
qDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
pHandle->offset += nread;
if (pHandle->offset >= item->size || nread < kBlockSize) {
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
}
} else {
qDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER,
pHandle->currFileIdx);
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) {
// finish
*ppData = NULL;
*size = 0;
return 0;
}
item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ);
nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset);
pHandle->offset += nread;
qDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d",
STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
}
SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf;
pHdr->size = nread;
pHdr->type = item->type;
pHdr->totalSize = item->size;
memcpy(pHdr->name, item->name, strlen(item->name));
pHandle->seraial += nread;
*ppData = buf;
*size = sizeof(SStreamSnapBlockHdr) + nread;
return 0;
}
// SMetaSnapWriter ========================================
int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter) {
// impl later
SStreamSnapWriter* pWriter = taosMemoryCalloc(1, sizeof(SStreamSnapWriter));
if (pWriter == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
SStreamSnapHandle* pHandle = &pWriter->handle;
SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile));
pFile->path = taosStrdup(path);
SArray* list = taosArrayInit(64, sizeof(SBackendFileItem));
SBackendFileItem item;
item.name = taosStrdup((char*)ROCKSDB_CURRENT);
item.type = ROCKSDB_CURRENT_TYPE;
taosArrayPush(list, &item);
pHandle->pBackendFile = pFile;
pHandle->pFileList = list;
pHandle->currFileIdx = 0;
pHandle->offset = 0;
*ppWriter = pWriter;
return 0;
}
int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData;
SStreamSnapHandle* pHandle = &pWriter->handle;
SBanckendFile* pFile = pHandle->pBackendFile;
SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
if (pHandle->fd == NULL) {
pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pHandle->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name,
tstrerror(code));
}
}
if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) {
int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset);
if (bytes != pHdr->size) {
code = TAOS_SYSTEM_ERROR(terrno);
qError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code));
return code;
}
pHandle->offset += bytes;
} else {
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
SBackendFileItem item;
item.name = taosStrdup(pHdr->name);
item.type = pHdr->type;
taosArrayPush(pHandle->pFileList, &item);
SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pHandle->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name,
tstrerror(code));
}
taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset);
pHandle->offset += pHdr->size;
}
// impl later
return 0;
}
int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) {
SStreamSnapHandle* handle = &pWriter->handle;
if (qDebugFlag & DEBUG_DEBUG) {
char* buf = (char*)taosMemoryMalloc(1024);
int n = sprintf(buf, "[");
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i);
if (i != taosArrayGetSize(handle->pFileList) - 1) {
n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size);
} else {
n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size);
}
}
qDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf);
taosMemoryFree(buf);
}
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i);
taosMemoryFree(item->name);
}
streamSnapHandleDestroy(handle);
taosMemoryFree(pWriter);
return 0;
}

View File

@ -108,7 +108,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
SStreamTask* pStreamTask = pTask;
char statePath[1024];
if (!specPath) {
sprintf(statePath, "%s/%d", path, pStreamTask->id.taskId);
sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId);
} else {
memset(statePath, 0, 1024);
tstrncpy(statePath, path, 1024);
@ -128,7 +128,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
if (uniqueId == NULL) {
int code = streamStateOpenBackend(pMeta->streamBackend, pState);
if (code == -1) {
taosReleaseRef(streamBackendId, pState->streamBackendRid);
taosThreadMutexUnlock(&pMeta->backendMutex);
taosMemoryFree(pState);
return NULL;
@ -139,8 +138,9 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
int64_t id = *(int64_t*)uniqueId;
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id);
taosAcquireRef(streamBackendId, pState->streamBackendRid);
// already exist stream task for
qInfo("already exist stream-state for %s", pState->pTdbState->idstr);
// taosAcquireRef(streamBackendId, pState->streamBackendRid);
}
taosThreadMutexUnlock(&pMeta->backendMutex);
@ -149,6 +149,8 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
_hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT);
pState->parNameMap = tSimpleHashInit(1024, hashFn);
qInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId,
pState->taskId);
return pState;
#else
@ -424,10 +426,15 @@ int32_t streamStateSaveInfo(SStreamState* pState, void* pKey, int32_t keyLen, vo
code = streamStatePutBatch(pState, "default", batch, pKey, pVal, vLen, 0);
if (code != 0) {
streamStateDestroyBatch(batch);
return code;
}
code = streamStatePutBatch_rocksdb(pState, batch);
streamStateDestroyBatch(batch);
// code = streamDefaultPut_rocksdb(pState, pKey, pVal, vLen);
// char* Val = NULL;
// int32_t len = 0;
// code = streamDefaultGet_rocksdb(pState, pKey, (void**)&Val, &len);
return code;
#else
return 0;
@ -729,7 +736,8 @@ void streamStateFreeVal(void* val) {
int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) {
#ifdef USE_ROCKSDB
qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId);
qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey,
key->groupId);
return streamStateSessionPut_rocksdb(pState, key, value, vLen);
#else
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
@ -763,7 +771,8 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) {
#ifdef USE_ROCKSDB
qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId);
qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey,
key->groupId);
return streamStateSessionDel_rocksdb(pState, key);
#else
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};

View File

@ -13,11 +13,12 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "streamInt.h"
#include "executor.h"
#include "streamInt.h"
#include "tmisce.h"
#include "tstream.h"
#include "wal.h"
#include "ttimer.h"
#include "wal.h"
static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) {
int32_t childId = taosArrayGetSize(pArray);
@ -59,6 +60,7 @@ int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo)
if (tEncodeI32(pEncoder, pInfo->childId) < 0) return -1;
/*if (tEncodeI64(pEncoder, pInfo->processedVer) < 0) return -1;*/
if (tEncodeSEpSet(pEncoder, &pInfo->epSet) < 0) return -1;
if (tEncodeI64(pEncoder, pInfo->stage) < 0) return -1;
return 0;
}
@ -68,6 +70,7 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) {
if (tDecodeI32(pDecoder, &pInfo->childId) < 0) return -1;
/*if (tDecodeI64(pDecoder, &pInfo->processedVer) < 0) return -1;*/
if (tDecodeSEpSet(pDecoder, &pInfo->epSet) < 0) return -1;
if (tDecodeI64(pDecoder, &pInfo->stage) < 0) return -1;
return 0;
}
@ -87,9 +90,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->info.mnodeEpset) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointId) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointVer) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1;
@ -102,10 +106,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1;
if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1;
int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
int32_t epSz = taosArrayGetSize(pTask->pUpstreamInfoList);
if (tEncodeI32(pEncoder, epSz) < 0) return -1;
for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1;
}
@ -135,6 +139,41 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
return pEncoder->pos;
}
int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) {
int64_t ver;
int64_t skip64;
int8_t skip8;
int32_t skip32;
int16_t skip16;
SEpSet epSet;
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &ver) < 0) return -1;
if (ver != SSTREAM_TASK_VER) return -1;
if (tDecodeI64(pDecoder, &skip64) < 0) return -1;
if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
if (tDecodeI16(pDecoder, &skip16) < 0) return -1;
if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
if (tDecodeI8(pDecoder, &skip8) < 0) return -1;
if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1;
if (tDecodeI64(pDecoder, &pChkpInfo->checkpointId) < 0) return -1;
if (tDecodeI64(pDecoder, &pChkpInfo->checkpointVer) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->ver) < 0) return -1;
@ -153,9 +192,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->info.mnodeEpset) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointId) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointVer) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1;
@ -171,7 +211,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
int32_t epSz = -1;
if (tDecodeI32(pDecoder, &epSz) < 0) return -1;
pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES);
pTask->pUpstreamInfoList = taosArrayInit(epSz, POINTER_BYTES);
for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo));
if (pInfo == NULL) return -1;
@ -179,7 +219,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
taosMemoryFreeClear(pInfo);
return -1;
}
taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo);
taosArrayPush(pTask->pUpstreamInfoList, &pInfo);
}
if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
@ -215,11 +255,18 @@ static void freeItem(void* p) {
rpcFreeCont(pInfo->msg.pCont);
}
static void freeUpstreamItem(void* p) {
SStreamChildEpInfo** pInfo = p;
taosMemoryFree(*pInfo);
}
void tFreeStreamTask(SStreamTask* pTask) {
qDebug("free s-task:0x%x, %p", pTask->id.taskId, pTask);
int32_t taskId = pTask->id.taskId;
qDebug("free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState);
// remove the ref by timer
while(pTask->status.timerActive > 0) {
while (pTask->status.timerActive > 0) {
qDebug("s-task:%s wait for task stop timer activities", pTask->id.idStr);
taosMsleep(10);
}
@ -256,21 +303,22 @@ void tFreeStreamTask(SStreamTask* pTask) {
walCloseReader(pTask->exec.pWalReader);
}
taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree);
if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) {
tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper);
taosMemoryFree(pTask->tbSink.pTSchema);
tSimpleHashCleanup(pTask->tbSink.pTblInfo);
} else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos);
taosArrayDestroy(pTask->checkReqIds);
pTask->checkReqIds = NULL;
pTask->checkReqIds = taosArrayDestroy(pTask->checkReqIds);
}
if (pTask->pState) {
qDebug("s-task:0x%x start to free task state", taskId);
streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING);
}
pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList);
taosThreadMutexDestroy(&pTask->lock);
if (pTask->msgInfo.pData != NULL) {
destroyStreamDataBlock(pTask->msgInfo.pData);
pTask->msgInfo.pData = NULL;
@ -289,6 +337,203 @@ void tFreeStreamTask(SStreamTask* pTask) {
pTask->pRspMsgList = NULL;
}
if (pTask->pUpstreamInfoList != NULL) {
taosArrayDestroyEx(pTask->pUpstreamInfoList, freeUpstreamItem);
pTask->pUpstreamInfoList = NULL;
}
taosThreadMutexDestroy(&pTask->lock);
taosMemoryFree(pTask);
qDebug("s-task:0x%x free task completed", taskId);
}
int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) {
pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
pTask->refCnt = 1;
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->status.timerActive = 0;
pTask->inputQueue = streamQueueOpen(512 << 10);
pTask->outputInfo.queue = streamQueueOpen(512 << 10);
if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) {
qError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr);
return -1;
}
pTask->tsInfo.init = taosGetTimestampMs();
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMeta = pMeta;
pTask->chkInfo.currentVer = ver;
pTask->dataRange.range.maxVer = ver;
pTask->dataRange.range.minVer = ver;
pTask->pMsgCb = pMsgCb;
taosThreadMutexInit(&pTask->lock, NULL);
streamTaskOpenAllUpstreamInput(pTask);
return TSDB_CODE_SUCCESS;
}
int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) {
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
return 0;
} else {
int32_t type = pTask->outputInfo.type;
if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__TABLE) {
return 1;
} else {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
return taosArrayGetSize(vgInfo);
}
}
}
static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) {
SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo));
if (pEpInfo == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pEpInfo->childId = pTask->info.selfChildId;
pEpInfo->epSet = pTask->info.epSet;
pEpInfo->nodeId = pTask->info.nodeId;
pEpInfo->taskId = pTask->id.taskId;
pEpInfo->stage = -1;
return pEpInfo;
}
int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) {
SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask);
if (pEpInfo == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
if (pTask->pUpstreamInfoList == NULL) {
pTask->pUpstreamInfoList = taosArrayInit(4, POINTER_BYTES);
}
taosArrayPush(pTask->pUpstreamInfoList, &pEpInfo);
return TSDB_CODE_SUCCESS;
}
void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) {
char buf[512] = {0};
EPSET_TO_STR(pEpSet, buf);
int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList);
for (int32_t i = 0; i < numOfUpstream; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
if (pInfo->nodeId == nodeId) {
epsetAssign(&pInfo->epSet, pEpSet);
qDebug("s-task:0x%x update the upstreamInfo, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf);
break;
}
}
}
void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) {
STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher;
pDispatcher->taskId = pDownstreamTask->id.taskId;
pDispatcher->nodeId = pDownstreamTask->info.nodeId;
pDispatcher->epSet = pDownstreamTask->info.epSet;
pTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH;
pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
}
void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) {
char buf[512] = {0};
EPSET_TO_STR(pEpSet, buf);
int8_t type = pTask->outputInfo.type;
if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgroups = taosArrayGetSize(pVgs);
for (int32_t i = 0; i < numOfVgroups; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i);
if (pVgInfo->vgId == nodeId) {
epsetAssign(&pVgInfo->epSet, pEpSet);
qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf);
break;
}
}
} else if (type == TASK_OUTPUT__FIXED_DISPATCH) {
STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher;
if (pDispatcher->nodeId == nodeId) {
epsetAssign(&pDispatcher->epSet, pEpSet);
qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpSet:%s", pTask->id.taskId, nodeId, buf);
}
} else {
// do nothing
}
}
int32_t streamTaskStop(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
int64_t st = taosGetTimestampMs();
const char* id = pTask->id.idStr;
pTask->status.taskStatus = TASK_STATUS__STOP;
qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
while (/*pTask->status.schedStatus != TASK_SCHED_STATUS__INACTIVE */!streamTaskIsIdle(pTask)) {
qDebug("s-task:%s level:%d wait for task to be idle, check again in 100ms", id, pTask->info.taskLevel);
taosMsleep(100);
}
int64_t el = taosGetTimestampMs() - st;
qDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pMeta->vgId, pTask->id.idStr, el);
return 0;
}
int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) {
char buf[512] = {0};
if (pTask->info.nodeId == nodeId) { // execution task should be moved away
epsetAssign(&pTask->info.epSet, pEpSet);
EPSET_TO_STR(pEpSet, buf)
qDebug("s-task:0x%x (vgId:%d) self node epset is updated %s", pTask->id.taskId, nodeId, buf);
}
// check for the dispath info and the upstream task info
int32_t level = pTask->info.taskLevel;
if (level == TASK_LEVEL__SOURCE) {
streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet);
} else if (level == TASK_LEVEL__AGG) {
streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet);
streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet);
} else { // TASK_LEVEL__SINK
streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet);
}
return 0;
}
int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) {
for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) {
SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i);
doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp);
}
return 0;
}
void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
return;
}
int32_t size = taosArrayGetSize(pTask->pUpstreamInfoList);
for (int32_t i = 0; i < size; ++i) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i);
pInfo->stage = -1;
}
qDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr);
}

View File

@ -89,11 +89,11 @@ static int64_t adjustWatermark(int64_t adjInterval, int64_t originInt, int64_t w
return watermark;
}
SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark) {
return updateInfoInit(pInterval->interval, pInterval->precision, watermark);
SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp) {
return updateInfoInit(pInterval->interval, pInterval->precision, watermark, igUp);
}
SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark) {
SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp) {
SUpdateInfo *pInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
if (pInfo == NULL) {
return NULL;
@ -104,14 +104,15 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma
pInfo->interval = adjustInterval(interval, precision);
pInfo->watermark = adjustWatermark(pInfo->interval, interval, watermark);
uint64_t bfSize = (uint64_t)(pInfo->watermark / pInfo->interval);
uint64_t bfSize = 0;
if (!igUp) {
bfSize = (uint64_t)(pInfo->watermark / pInfo->interval);
pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *));
if (pInfo->pTsSBFs == NULL) {
updateInfoDestroy(pInfo);
return NULL;
}
pInfo->numSBFs = bfSize;
windowSBfAdd(pInfo, bfSize);
pInfo->pTsBuckets = taosArrayInit(DEFAULT_BUCKET_SIZE, sizeof(TSKEY));
@ -128,6 +129,8 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma
pInfo->pCloseWinSBF = NULL;
_hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT);
pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK);
}
pInfo->numSBFs = bfSize;
pInfo->maxDataVersion = 0;
return pInfo;
}

View File

@ -49,7 +49,8 @@ struct SStreamFileState {
typedef SRowBuffPos SRowBuffInfo;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) {
GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId,
int64_t checkpointId) {
if (memSize <= 0) {
memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE;
}
@ -83,9 +84,9 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
pFileState->deleteMark = delMark;
pFileState->flushMark = INT64_MIN;
pFileState->maxTs = INT64_MIN;
pFileState->id = taosStrdup(idstr);
pFileState->id = taosStrdup(taskId);
recoverSnapshot(pFileState);
recoverSnapshot(pFileState, checkpointId);
return pFileState;
_error:
@ -385,7 +386,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
0, buf);
// todo handle failure
memset(buf, 0, len);
// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code);
// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code);
}
taosMemoryFree(buf);
@ -396,8 +397,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
streamStateClearBatch(batch);
int64_t elapsed = taosGetTimestampMs() - st;
qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems,
BATCH_LIMIT, elapsed);
qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%" PRId64 "ms",
pFileState->id, numOfElems, BATCH_LIMIT, elapsed);
if (flushState) {
const char* taskKey = "streamFileState";
@ -479,7 +480,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
return code;
}
int32_t recoverSnapshot(SStreamFileState* pFileState) {
int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) {
int32_t code = TSDB_CODE_SUCCESS;
if (pFileState->maxTs != INT64_MIN) {
int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs)
@ -487,8 +488,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) {
: pFileState->maxTs - pFileState->deleteMark;
deleteExpiredCheckPoint(pFileState, mark);
}
void* pStVal = NULL;
int32_t len = 0;
SWinKey key = {.groupId = 0, .ts = 0};
SStreamStateCur* pCur = streamStateSeekToLast_rocksdb(pFileState->pFileStore, &key);
@ -508,9 +507,12 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) {
destroyRowBuffPos(pNewPos);
SListNode* pNode = tdListPopTail(pFileState->usedBuffs);
taosMemoryFreeClear(pNode);
taosMemoryFreeClear(pVal);
break;
}
ASSERT(pVLen == pFileState->rowSize);
memcpy(pNewPos->pRowBuff, pVal, pVLen);
taosMemoryFreeClear(pVal);
code = tSimpleHashPut(pFileState->rowBuffMap, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES);
if (code != TSDB_CODE_SUCCESS) {
destroyRowBuffPos(pNewPos);

View File

@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test {
protected:
virtual void SetUp() {
streamMetaInit();
backend = streamBackendInit(path);
backend = streamBackendInit(path, 0);
}
virtual void TearDown() {
streamMetaCleanup();

View File

@ -804,7 +804,7 @@ TEST(TdbPageRecycleTest, recycly_delete_interior_ofp_nocommit) {
// sprintf(&key[count - 2], "%c", i);
key[count - 2] = '0' + i;
ret = tdbTbInsert(pDb, key, count, NULL, NULL, txn);
ret = tdbTbInsert(pDb, key, count, NULL, 0, txn);
GTEST_ASSERT_EQ(ret, 0);
}
}

View File

@ -262,7 +262,6 @@ bool transAsyncPoolIsEmpty(SAsyncPool* pool);
#define ASYNC_CHECK_HANDLE(exh1, id) \
do { \
if (id > 0) { \
tTrace("handle step1"); \
SExHandle* exh2 = transAcquireExHandle(transGetRefMgt(), id); \
if (exh2 == NULL || id != exh2->refId) { \
tTrace("handle %p except, may already freed, ignore msg, ref1:%" PRIu64 ", ref2:%" PRIu64, exh1, \

View File

@ -12,7 +12,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// clang-format off
#include "transComm.h"
#include "tmisce.h"
// clang-format on
typedef struct {
int32_t numOfConn;
@ -308,19 +311,6 @@ static void cliWalkCb(uv_handle_t* handle, void* arg);
} \
} while (0)
#define EPSET_DEBUG_STR(epSet, tbuf) \
do { \
int len = snprintf(tbuf, sizeof(tbuf), "epset:{"); \
for (int i = 0; i < (epSet)->numOfEps; i++) { \
if (i == (epSet)->numOfEps - 1) { \
len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \
} else { \
len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \
} \
} \
len += snprintf(tbuf + len, sizeof(tbuf) - len, "}, inUse:%d", (epSet)->inUse); \
} while (0);
static void* cliWorkThread(void* arg);
static void cliReleaseUnfinishedMsg(SCliConn* conn) {
@ -1268,7 +1258,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) {
SCliThrd* pThrd = pConn->hostThrd;
STrans* pTransInst = pThrd->pTransInst;
if (status == -1) status = ENETUNREACH;
if (status == -1) status = UV_EADDRNOTAVAIL;
if (pConn->pBatch == NULL) {
SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0);
@ -2167,7 +2157,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) {
if (rpcDebugFlag & DEBUG_DEBUG) {
STraceId* trace = &pMsg->msg.info.traceId;
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
EPSET_TO_STR(&pCtx->epSet, tbuf);
tGDebug("%s retry on next node,use:%s, step: %d,timeout:%" PRId64 "", transLabel(pThrd->pTransInst), tbuf,
pCtx->retryStep, pCtx->retryNextInterval);
}
@ -2396,7 +2386,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
if (hasEpSet) {
if (rpcDebugFlag & DEBUG_TRACE) {
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
EPSET_TO_STR(&pCtx->epSet, tbuf);
tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn);
}
}

View File

@ -196,8 +196,6 @@ static bool uvHandleReq(SSvrConn* pConn) {
tError("%s conn %p recv invalid packet, failed to decompress", transLabel(pTransInst), pConn);
return false;
}
tDebug("head version: %d 2", pHead->version);
pHead->code = htonl(pHead->code);
pHead->msgLen = htonl(pHead->msgLen);
@ -727,7 +725,6 @@ void uvOnAcceptCb(uv_stream_t* stream, int status) {
}
}
void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
tTrace("connection coming");
if (nread < 0) {
if (nread != UV_EOF) {
tError("read error %s", uv_err_name(nread));

View File

@ -360,6 +360,23 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*)) {
pArray->size = 0;
}
void taosArrayClearP(SArray* pArray, void (*fp)(void*)) {
// if (pArray == NULL) return;
// if (fp == NULL) {
// pArray->size = 0;
// return;
// }
// for (int32_t i = 0; i < pArray->size; ++i) {
// fp(TARRAY_GET_ELEM(pArray, i));
// }
if (pArray) {
for (int32_t i = 0; i < pArray->size; i++) {
fp(*(void**)TARRAY_GET_ELEM(pArray, i));
}
}
taosArrayClear(pArray);
}
void* taosArrayDestroy(SArray* pArray) {
if (pArray) {

View File

@ -181,7 +181,8 @@ int64_t taosAddRef(int32_t rsetId, void *p) {
if (pSet->nodeList[hash]) pSet->nodeList[hash]->prev = pNode;
pSet->nodeList[hash] = pNode;
uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d", rsetId, p, rid, pSet->count);
uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d, remain count:%d", rsetId, p, rid, pSet->count,
pNode->count);
taosUnlockList(pSet->lockedBy + hash);
@ -235,7 +236,7 @@ void *taosAcquireRef(int32_t rsetId, int64_t rid) {
if (pNode->removed == 0) {
pNode->count++;
p = pNode->p;
uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired", rsetId, pNode->p, rid);
uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired, remain count:%d", rsetId, pNode->p, rid, pNode->count);
} else {
terrno = TSDB_CODE_REF_NOT_EXIST;
uTrace("rsetId:%d p:%p rid:%" PRId64 " is already removed, failed to acquire", rsetId, pNode->p, rid);

1137
stream

File diff suppressed because it is too large Load Diff

View File

@ -1028,6 +1028,7 @@
,,y,script,./test.sh -f tsim/stream/basic2.sim
,,y,script,./test.sh -f tsim/stream/basic3.sim
,,y,script,./test.sh -f tsim/stream/basic4.sim
,,y,script,./test.sh -f tsim/stream/checkpointInterval0.sim
,,y,script,./test.sh -f tsim/stream/checkStreamSTable1.sim
,,y,script,./test.sh -f tsim/stream/checkStreamSTable.sim
,,y,script,./test.sh -f tsim/stream/deleteInterval.sim

103
tests/script/log Normal file
View File

@ -0,0 +1,103 @@
------------------------------------------------------------------------
Start TDengine Testing Case ...
BUILD_DIR: /root/yihao/work/TDengine/debug
SIM_DIR : /root/yihao/work/TDengine/sim
CODE_DIR : /root/yihao/work/TDengine/tests/script
CFG_DIR : /root/yihao/work/TDengine/sim/tsim/cfg
ASAN_DIR : /root/yihao/work/TDengine/sim/asan
------------------------------------------------------------------------
ExcuteCmd: /root/yihao/work/TDengine/debug/build/bin/tsim -c /root/yihao/work/TDengine/sim/tsim/cfg -f tsim/stream/state0.sim
AsanDir: /root/yihao/work/TDengine/sim/asan/tsim.asan
08/18 17:00:43.118420 00438970 SIM simulator is running ...
Executing deploy.sh
SCRIPT_DIR: /root/yihao/work/TDengine/tests/script
------------ start dnode1
nohup /root/yihao/work/TDengine/debug/build/bin/taosd -c /root/yihao/work/TDengine/sim/dnode1/cfg > /dev/null 2>&1 &
08/18 17:00:43.288417 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms begin
08/18 17:00:43.338486 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms finished
08/18 17:00:43.679227 00438970 SIM script:tsim/stream/state0.sim, =============== create database
08/18 17:00:45.450379 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL
08/18 17:00:45.454189 00438970 SIM script:tsim/stream/state0.sim, create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a);
08/18 17:00:45.610789 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:46.610955 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:46.611228 00438970 SIM script:tsim/stream/state0.sim, =====rows=0
08/18 17:00:46.627080 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:47.627254 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:47.637364 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:48.637446 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:48.684845 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:49.685055 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:49.685390 00438970 SIM script:tsim/stream/state0.sim, =====rows=0
08/18 17:00:49.712001 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:50.712137 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:50.713185 00438970 SIM script:tsim/stream/state0.sim, loop1 end
08/18 17:00:50.737435 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:51.737618 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:51.738003 00438970 SIM script:tsim/stream/state0.sim, =====data21=null
08/18 17:00:51.764834 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:52.764971 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:52.805578 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:53.805742 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:53.806044 00438970 SIM script:tsim/stream/state0.sim, =====data26=null
08/18 17:00:53.828622 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:54.828756 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:54.890423 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:55.890533 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:55.890839 00438970 SIM script:tsim/stream/state0.sim, =====data21=null
08/18 17:00:55.917818 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:56.918008 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:56.966614 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:57.966809 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:57.967085 00438970 SIM script:tsim/stream/state0.sim, ====loop4=rows=0
08/18 17:00:57.993757 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:00:58.993936 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:00:58.994839 00438970 SIM script:tsim/stream/state0.sim, loop4 end
08/18 17:00:59.678915 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL
08/18 17:00:59.698124 00438970 SIM script:tsim/stream/state0.sim, create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a);
08/18 17:00:59.862389 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:00.862605 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:01.584856 00438970 SIM script:tsim/stream/state0.sim, create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt3 as select _wstart, count(*) c1, sum(b) c3 from t1 state_window(a);
08/18 17:01:01.778514 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:02.778699 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:09.536746 00438970 SIM script:tsim/stream/state0.sim, create stream if not exists streams4 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart AS startts, min(c1),count(c1) from t1 state_window(c1);
08/18 17:01:09.789267 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:10.789450 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:10.863370 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:11.863556 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:11.909184 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:12.909288 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:12.936362 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:13.936494 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:15.301064 00438970 SIM script:tsim/stream/state0.sim, create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3 from tb state_window(a);
08/18 17:01:15.466754 00438970 SIM script:tsim/stream/state0.sim, data00:null
08/18 17:01:15.466768 00438970 SIM script:tsim/stream/state0.sim, data01:null
08/18 17:01:15.489373 00438970 SIM script:tsim/stream/state0.sim, data00:null
08/18 17:01:15.489392 00438970 SIM script:tsim/stream/state0.sim, data01:null
08/18 17:01:15.511826 00438970 SIM script:tsim/stream/state0.sim, data00:null
08/18 17:01:15.511841 00438970 SIM script:tsim/stream/state0.sim, data01:null
08/18 17:01:15.522397 00438970 SIM script:tsim/stream/state0.sim, data00:null
08/18 17:01:15.522416 00438970 SIM script:tsim/stream/state0.sim, data01:null
08/18 17:01:15.526939 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin
08/18 17:01:16.527119 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished
08/18 17:01:16.542936 00438970 SIM script:tsim/stream/state0.sim, state0 end
------------ stop dnode1
try to kill by signal SIGINT
try to kill by signal SIGINT
try to kill by signal SIGINT
try to kill by signal SIGINT
08/18 17:01:20.674086 00438970 SIM script:tsim/stream/state0.sim, return cmd execute with:1
08/18 17:01:20.674103 00438970 SIM script:tsim/stream/state0.sim, success
08/18 17:01:20.674165 00438970 SIM script:tsim/stream/state0.sim, background script num:0, stop them
08/18 17:01:20.674521 00438970 SIM ----------------------------------------------------------------------
08/18 17:01:20.674528 00438970 SIM Simulation Test Done, 1 Passed:
08/18 17:01:20.674534 00438970 SIM thread is stopped
08/18 17:01:20.674537 00438970 SIM execute result 0
Execute result: 0
Killing taosd processes
asan error_num: 0
asan memory_leak: 0
asan indirect_leak: 0
asan runtime error: 0
asan python error: 0
no asan errors

View File

@ -118,7 +118,7 @@ echo "statusInterval 1" >> $TAOS_CFG
echo "dataDir $DATA_DIR" >> $TAOS_CFG
echo "logDir $LOG_DIR" >> $TAOS_CFG
echo "debugFlag 0" >> $TAOS_CFG
echo "tmrDebugFlag 143" >> $TAOS_CFG
echo "tmrDebugFlag 131" >> $TAOS_CFG
echo "uDebugFlag 143" >> $TAOS_CFG
echo "rpcDebugFlag 143" >> $TAOS_CFG
echo "jniDebugFlag 143" >> $TAOS_CFG

View File

@ -0,0 +1,255 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print step 1
print =============== create database
sql create database test vgroups 1;
sql use test;
sql create table t1(ts timestamp, a int, b int , c int, d double);
sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 interval(10s);
sql create stream streams1 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, sum(a) from t1 interval(10s);
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t1 values(1648791213001,2,2,3,1.1);
$loop_count = 0
loop0:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop0
endi
# row 0
if $data01 != 2 then
print =====data01=$data01
goto loop0
endi
if $data02 != 3 then
print =====data02=$data02
goto loop0
endi
$loop_count = 0
loop01:
sleep 1000
sql select * from streamt1;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 0 then
print =====rows=$rows expect 1
goto loop01
endi
print waiting for checkpoint generation 1 ......
sleep 25000
print restart taosd 01 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791213002,3,2,3,1.1);
$loop_count = 0
loop1:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop1
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop1
endi
if $data02 != 6 then
print =====data02=$data02
goto loop1
endi
sql insert into t1 values(1648791223003,4,2,3,1.1);
$loop_count = 0
loop2:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop2
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop2
endi
if $data02 != 6 then
print =====data02=$data02
goto loop2
endi
# row 1
if $data11 != 1 then
print =====data11=$data11
goto loop2
endi
if $data12 != 4 then
print =====data12=$data12
goto loop2
endi
$loop_count = 0
loop3:
sleep 1000
print select * from streamt1;
sql select * from streamt1;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 2
goto loop3
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop3
endi
if $data02 != 6 then
print =====data02=$data02
goto loop3
endi
print step 2
print restart taosd 02 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791223004,5,2,3,1.1);
loop4:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop4
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop4
endi
if $data02 != 6 then
print =====data02=$data02
goto loop4
endi
# row 1
if $data11 != 2 then
print =====data11=$data11
goto loop4
endi
if $data12 != 9 then
print =====data12=$data12
goto loop4
endi
$loop_count = 0
loop5:
sleep 1000
print select * from streamt1;
sql select * from streamt1;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 2
goto loop5
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop5
endi
if $data02 != 6 then
print =====data02=$data02
goto loop5
endi
print end---------------------------------
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -0,0 +1,104 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print step 1
sql create database test vgroups 4;
sql use test;
sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int);
sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st interval(10s);
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t2 values(1648791213001,2,2,3,1.1);
$loop_count = 0
loop0:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop0
endi
# row 0
if $data01 != 2 then
print =====data01=$data01
goto loop0
endi
if $data02 != 3 then
print =====data02=$data02
goto loop0
endi
print waiting for checkpoint generation 1 ......
sleep 25000
print restart taosd
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791213002,3,2,3,1.1);
sql insert into t2 values(1648791223003,4,2,3,1.1);
$loop_count = 0
loop1:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop1
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop1
endi
if $data02 != 6 then
print =====data02=$data02
goto loop1
endi
# row 1
if $data11 != 1 then
print =====data11=$data11
goto loop1
endi
if $data12 != 4 then
print =====data12=$data12
goto loop1
endi
print end---------------------------------
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -0,0 +1,178 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print step 1
print =============== create database
sql create database test vgroups 1;
sql use test;
sql create table t1(ts timestamp, a int, b int , c int, d double);
sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 session(ts, 10s);
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t1 values(1648791213001,2,2,3,1.1);
$loop_count = 0
loop0:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop0
endi
# row 0
if $data01 != 2 then
print =====data01=$data01
goto loop0
endi
if $data02 != 3 then
print =====data02=$data02
goto loop0
endi
print waiting for checkpoint generation 1 ......
sleep 25000
print restart taosd 01 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791213002,3,2,3,1.1);
$loop_count = 0
loop1:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop1
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop1
endi
if $data02 != 6 then
print =====data02=$data02
goto loop1
endi
sql insert into t1 values(1648791233003,4,2,3,1.1);
$loop_count = 0
loop2:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop2
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop2
endi
if $data02 != 6 then
print =====data02=$data02
goto loop2
endi
# row 1
if $data11 != 1 then
print =====data11=$data11
goto loop2
endi
if $data12 != 4 then
print =====data12=$data12
goto loop2
endi
print step 2
print restart taosd 02 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791233004,5,2,3,1.1);
loop20:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop20
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop20
endi
if $data02 != 6 then
print =====data02=$data02
goto loop20
endi
# row 1
if $data11 != 2 then
print =====data11=$data11
goto loop20
endi
if $data12 != 9 then
print =====data12=$data12
goto loop20
endi
print end---------------------------------
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -0,0 +1,104 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print step 1
sql create database test vgroups 4;
sql use test;
sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int);
sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st session(ts, 10s);
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t2 values(1648791213001,2,2,3,1.1);
$loop_count = 0
loop0:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop0
endi
# row 0
if $data01 != 2 then
print =====data01=$data01
goto loop0
endi
if $data02 != 3 then
print =====data02=$data02
goto loop0
endi
print waiting for checkpoint generation 1 ......
sleep 25000
print restart taosd
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791213002,3,2,3,1.1);
sql insert into t2 values(1648791233003,4,2,3,1.1);
$loop_count = 0
loop1:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop1
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop1
endi
if $data02 != 6 then
print =====data02=$data02
goto loop1
endi
# row 1
if $data11 != 1 then
print =====data11=$data11
goto loop1
endi
if $data12 != 4 then
print =====data12=$data12
goto loop1
endi
print end---------------------------------
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -0,0 +1,178 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print step 1
print =============== create database
sql create database test vgroups 1;
sql use test;
sql create table t1(ts timestamp, a int, b int , c int, d double);
sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 state_window(b);
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t1 values(1648791213001,2,2,3,1.1);
$loop_count = 0
loop0:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop0
endi
# row 0
if $data01 != 2 then
print =====data01=$data01
goto loop0
endi
if $data02 != 3 then
print =====data02=$data02
goto loop0
endi
print waiting for checkpoint generation 1 ......
sleep 25000
print restart taosd 01 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791213002,3,2,3,1.1);
$loop_count = 0
loop1:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows expect 1
goto loop1
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop1
endi
if $data02 != 6 then
print =====data02=$data02
goto loop1
endi
sql insert into t1 values(1648791233003,4,3,3,1.1);
$loop_count = 0
loop2:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop2
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop2
endi
if $data02 != 6 then
print =====data02=$data02
goto loop2
endi
# row 1
if $data11 != 1 then
print =====data11=$data11
goto loop2
endi
if $data12 != 4 then
print =====data12=$data12
goto loop2
endi
print step 2
print restart taosd 02 ......
system sh/stop_dnodes.sh
system sh/exec.sh -n dnode1 -s start
sql insert into t1 values(1648791233004,5,3,3,1.1);
loop20:
sleep 1000
sql select * from streamt;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 2 then
print =====rows=$rows expect 2
goto loop20
endi
# row 0
if $data01 != 3 then
print =====data01=$data01
goto loop20
endi
if $data02 != 6 then
print =====data02=$data02
goto loop20
endi
# row 1
if $data11 != 2 then
print =====data11=$data11
goto loop20
endi
if $data12 != 9 then
print =====data12=$data12
goto loop20
endi
print end---------------------------------
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -237,6 +237,52 @@
./test.sh -f tsim/table/table.sim
./test.sh -f tsim/table/tinyint.sim
./test.sh -f tsim/table/vgroup.sim
./test.sh -f tsim/stream/basic0.sim -g
./test.sh -f tsim/stream/basic1.sim
./test.sh -f tsim/stream/basic2.sim
./test.sh -f tsim/stream/basic3.sim
./test.sh -f tsim/stream/basic4.sim
./test.sh -f tsim/stream/checkpointInterval0.sim
./test.sh -f tsim/stream/checkStreamSTable1.sim
./test.sh -f tsim/stream/checkStreamSTable.sim
./test.sh -f tsim/stream/deleteInterval.sim
./test.sh -f tsim/stream/deleteSession.sim
./test.sh -f tsim/stream/deleteState.sim
./test.sh -f tsim/stream/distributeInterval0.sim
./test.sh -f tsim/stream/distributeIntervalRetrive0.sim
./test.sh -f tsim/stream/distributeSession0.sim
./test.sh -f tsim/stream/drop_stream.sim
./test.sh -f tsim/stream/fillHistoryBasic1.sim
./test.sh -f tsim/stream/fillHistoryBasic2.sim
./test.sh -f tsim/stream/fillHistoryBasic3.sim
./test.sh -f tsim/stream/fillIntervalDelete0.sim
./test.sh -f tsim/stream/fillIntervalDelete1.sim
./test.sh -f tsim/stream/fillIntervalLinear.sim
./test.sh -f tsim/stream/fillIntervalPartitionBy.sim
./test.sh -f tsim/stream/fillIntervalPrevNext1.sim
./test.sh -f tsim/stream/fillIntervalPrevNext.sim
./test.sh -f tsim/stream/fillIntervalRange.sim
./test.sh -f tsim/stream/fillIntervalValue.sim
./test.sh -f tsim/stream/ignoreCheckUpdate.sim
./test.sh -f tsim/stream/ignoreExpiredData.sim
./test.sh -f tsim/stream/partitionby1.sim
./test.sh -f tsim/stream/partitionbyColumnInterval.sim
./test.sh -f tsim/stream/partitionbyColumnSession.sim
./test.sh -f tsim/stream/partitionbyColumnState.sim
./test.sh -f tsim/stream/partitionby.sim
./test.sh -f tsim/stream/pauseAndResume.sim
./test.sh -f tsim/stream/schedSnode.sim
./test.sh -f tsim/stream/session0.sim
./test.sh -f tsim/stream/session1.sim
./test.sh -f tsim/stream/sliding.sim
./test.sh -f tsim/stream/state0.sim
./test.sh -f tsim/stream/state1.sim
./test.sh -f tsim/stream/triggerInterval0.sim
./test.sh -f tsim/stream/triggerSession0.sim
./test.sh -f tsim/stream/udTableAndTag0.sim
./test.sh -f tsim/stream/udTableAndTag1.sim
./test.sh -f tsim/stream/udTableAndTag2.sim
./test.sh -f tsim/stream/windowClose.sim
./test.sh -f tsim/trans/lossdata1.sim
./test.sh -f tsim/tmq/basic1.sim
./test.sh -f tsim/tmq/basic2.sim

View File

@ -1,6 +1,6 @@
{
"filetype": "insert",
"cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/",
"cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/",
"host": "localhost",
"port": 6030,
"rest_port": 6041,

View File

@ -1,6 +1,6 @@
{
"filetype": "insert",
"cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/",
"cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/",
"host": "localhost",
"port": 6030,
"rest_port": 6041,