Merge pull request #21735 from taosdata/refact/fillhistory

refactor: refactor the fill history operation
This commit is contained in:
Haojun Liao 2023-07-05 10:48:51 +08:00 committed by GitHub
commit 9b3e34d589
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
88 changed files with 4057 additions and 2310 deletions

View File

@ -5,8 +5,8 @@ if (${BUILD_CONTRIB})
URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz
URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND "" INSTALL_COMMAND ""
@ -18,8 +18,8 @@ else()
URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz
URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND "" INSTALL_COMMAND ""

View File

@ -54,6 +54,11 @@ typedef struct SSessionKey {
uint64_t groupId; uint64_t groupId;
} SSessionKey; } SSessionKey;
typedef struct SVersionRange {
uint64_t minVer;
uint64_t maxVer;
} SVersionRange;
static inline int winKeyCmprImpl(const void* pKey1, const void* pKey2) { static inline int winKeyCmprImpl(const void* pKey1, const void* pKey2) {
SWinKey* pWin1 = (SWinKey*)pKey1; SWinKey* pWin1 = (SWinKey*)pKey1;
SWinKey* pWin2 = (SWinKey*)pKey2; SWinKey* pWin2 = (SWinKey*)pKey2;

View File

@ -177,7 +177,6 @@ static FORCE_INLINE void colDataSetDouble(SColumnInfoData* pColumnInfoData, uint
int32_t getJsonValueLen(const char* data); int32_t getJsonValueLen(const char* data);
int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull); int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull);
int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull);
int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData); int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData);
int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, uint32_t numOfRows, bool trimValue); int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, uint32_t numOfRows, bool trimValue);
int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int32_t* capacity, int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int32_t* capacity,
@ -187,6 +186,7 @@ int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* p
int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock, int32_t tsColumnIndex); int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock, int32_t tsColumnIndex);
int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows); int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows);
int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx); int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx);
void colDataTrim(SColumnInfoData* pColumnInfoData); void colDataTrim(SColumnInfoData* pColumnInfoData);
@ -208,7 +208,6 @@ double blockDataGetSerialRowSize(const SSDataBlock* pBlock);
size_t blockDataGetSerialMetaSize(uint32_t numOfCols); size_t blockDataGetSerialMetaSize(uint32_t numOfCols);
int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo); int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo);
int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst);
int32_t colInfoDataEnsureCapacity(SColumnInfoData* pColumn, uint32_t numOfRows, bool clearPayload); int32_t colInfoDataEnsureCapacity(SColumnInfoData* pColumn, uint32_t numOfRows, bool clearPayload);
int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows); int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows);
@ -237,11 +236,10 @@ int32_t blockDataAppendColInfo(SSDataBlock* pBlock, SColumnInfoData* pColIn
SColumnInfoData createColumnInfoData(int16_t type, int32_t bytes, int16_t colId); SColumnInfoData createColumnInfoData(int16_t type, int32_t bytes, int16_t colId);
SColumnInfoData* bdGetColumnInfoData(const SSDataBlock* pBlock, int32_t index); SColumnInfoData* bdGetColumnInfoData(const SSDataBlock* pBlock, int32_t index);
int32_t blockGetEncodeSize(const SSDataBlock* pBlock);
int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols); int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols);
const char* blockDecode(SSDataBlock* pBlock, const char* pData); const char* blockDecode(SSDataBlock* pBlock, const char* pData);
void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag);
void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag);
// for debug // for debug
char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf); char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf);
@ -251,9 +249,7 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** pReq, const SSDataBlock* pData
char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId); char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId);
int32_t buildCtbNameByGroupIdImpl(const char* stbName, uint64_t groupId, char* pBuf); int32_t buildCtbNameByGroupIdImpl(const char* stbName, uint64_t groupId, char* pBuf);
static FORCE_INLINE int32_t blockGetEncodeSize(const SSDataBlock* pBlock) { void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList);
return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock);
}
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -252,7 +252,9 @@ enum {
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "stream-recover-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TRANSFER_STATE, "stream-transfer-state", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL)
@ -297,8 +299,7 @@ enum {
TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, "vnode-stream-recover1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, "vnode-stream-recover2", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL)

View File

@ -55,6 +55,9 @@ typedef struct {
void* pStateBackend; void* pStateBackend;
struct SStorageAPI api; struct SStorageAPI api;
int8_t fillHistory;
STimeWindow winRange;
} SReadHandle; } SReadHandle;
// in queue mode, data streams are seperated by msg // in queue mode, data streams are seperated by msg
@ -193,14 +196,6 @@ int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t le
void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order); void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order);
void getInitialStartTimeWindow(SInterval* pInterval, TSKEY ts, STimeWindow* w, bool ascQuery); void getInitialStartTimeWindow(SInterval* pInterval, TSKEY ts, STimeWindow* w, bool ascQuery);
STimeWindow getAlignQueryTimeWindow(const SInterval* pInterval, int64_t key); STimeWindow getAlignQueryTimeWindow(const SInterval* pInterval, int64_t key);
/**
* return the scan info, in the form of tuple of two items, including table uid and current timestamp
* @param tinfo
* @param uid
* @param ts
* @return
*/
int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts);
SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo);
@ -220,15 +215,22 @@ void* qExtractReaderFromStreamScanner(void* scanner);
int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner); int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner);
int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo); int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo);
int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow);
int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow);
int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo);
int32_t qStreamRestoreParam(qTaskInfo_t tinfo); int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo);
bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); bool qStreamRecoverScanFinished(qTaskInfo_t tinfo);
void qStreamCloseTsdbReader(void* task); bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo);
bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo);
int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo);
void resetTaskInfo(qTaskInfo_t tinfo); void resetTaskInfo(qTaskInfo_t tinfo);
void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo);
int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo);
int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -234,29 +234,6 @@ typedef struct SStoreSnapshotFn {
int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid); int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid);
} SStoreSnapshotFn; } SStoreSnapshotFn;
/**
void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
void metaReaderReleaseLock(SMetaReader *pReader);
void metaReaderClear(SMetaReader *pReader);
int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid);
int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid);
int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList);
const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal);
int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName);
int metaGetTableUidByName(void *meta, char *tbName, uint64_t *uid);
int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType);
bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid);
int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList,
bool *acquired);
int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload,
int32_t payloadLen, double selectivityRatio);
tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name);
int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList);
int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t
payloadLen);
*/
typedef struct SStoreMeta { typedef struct SStoreMeta {
SMTbCursor* (*openTableMetaCursor)(void* pVnode); // metaOpenTbCursor SMTbCursor* (*openTableMetaCursor)(void* pVnode); // metaOpenTbCursor
void (*closeTableMetaCursor)(SMTbCursor* pTbCur); // metaCloseTbCursor void (*closeTableMetaCursor)(SMTbCursor* pTbCur); // metaCloseTbCursor
@ -403,7 +380,7 @@ typedef struct SStateStore {
SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key); SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key);
struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize, struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize,
uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark); uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id);
void (*streamFileStateDestroy)(struct SStreamFileState* pFileState); void (*streamFileStateDestroy)(struct SStreamFileState* pFileState);
void (*streamFileStateClear)(struct SStreamFileState* pFileState); void (*streamFileStateClear)(struct SStreamFileState* pFileState);
@ -415,6 +392,7 @@ typedef struct SStateStore {
int32_t (*streamStateCommit)(SStreamState* pState); int32_t (*streamStateCommit)(SStreamState* pState);
void (*streamStateDestroy)(SStreamState* pState, bool remove); void (*streamStateDestroy)(SStreamState* pState, bool remove);
int32_t (*streamStateDeleteCheckPoint)(SStreamState* pState, TSKEY mark); int32_t (*streamStateDeleteCheckPoint)(SStreamState* pState, TSKEY mark);
void (*streamStateReloadInfo)(SStreamState* pState, TSKEY ts);
} SStateStore; } SStateStore;
typedef struct SStorageAPI { typedef struct SStorageAPI {

View File

@ -129,30 +129,38 @@ typedef struct SSerializeDataHandle {
} SSerializeDataHandle; } SSerializeDataHandle;
// incremental state storage // incremental state storage
typedef struct STdbState {
void *rocksdb;
void **pHandle;
void *writeOpts;
void *readOpts;
void **cfOpts;
void *dbOpt;
struct SStreamTask *pOwner;
void *param;
void *env;
SListNode *pComparNode;
void *pBackend;
char idstr[64];
void *compactFactory;
TdThreadRwlock rwLock;
void *db; typedef struct SBackendCfWrapper {
void *pStateDb; void *rocksdb;
void *pFuncStateDb; void **pHandle;
void *pFillStateDb; // todo refactor void *writeOpts;
void *pSessionStateDb; void *readOpts;
void *pParNameDb; void **cfOpts;
void *pParTagDb; void *dbOpt;
void *txn; void *param;
void *env;
SListNode *pComparNode;
void *pBackend;
void *compactFactory;
TdThreadRwlock rwLock;
bool remove;
int64_t backendId;
char idstr[64];
} SBackendCfWrapper;
typedef struct STdbState {
SBackendCfWrapper *pBackendCfWrapper;
int64_t backendCfWrapperId;
char idstr[64];
struct SStreamTask *pOwner;
void *db;
void *pStateDb;
void *pFuncStateDb;
void *pFillStateDb; // todo refactor
void *pSessionStateDb;
void *pParNameDb;
void *pParTagDb;
void *txn;
} STdbState; } STdbState;
typedef struct { typedef struct {

View File

@ -138,6 +138,8 @@ int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur);
int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char* tbname); int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char* tbname);
int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal); int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal);
void streamStateReloadInfo(SStreamState* pState, TSKEY ts);
/***compare func **/ /***compare func **/
typedef struct SStateChekpoint { typedef struct SStateChekpoint {

View File

@ -44,10 +44,8 @@ enum {
TASK_STATUS__DROPPING, TASK_STATUS__DROPPING,
TASK_STATUS__FAIL, TASK_STATUS__FAIL,
TASK_STATUS__STOP, TASK_STATUS__STOP,
TASK_STATUS__WAIT_DOWNSTREAM, TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner
TASK_STATUS__RECOVER_PREPARE, TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused
TASK_STATUS__RECOVER1,
TASK_STATUS__RECOVER2,
TASK_STATUS__PAUSE, TASK_STATUS__PAUSE,
}; };
@ -133,7 +131,6 @@ typedef struct {
// ref data block, for delete // ref data block, for delete
typedef struct { typedef struct {
int8_t type; int8_t type;
int64_t ver;
SSDataBlock* pBlock; SSDataBlock* pBlock;
} SStreamRefDataBlock; } SStreamRefDataBlock;
@ -203,13 +200,11 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) {
atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); atomic_store_8(&queue->status, STREAM_QUEUE__FAILED);
} }
void* streamQueueNextItem(SStreamQueue* queue); void* streamQueueNextItem(SStreamQueue* pQueue);
SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type); SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type);
void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit); void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit);
SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit);
typedef struct { typedef struct {
char* qmsg; char* qmsg;
void* pExecutor; // not applicable to encoder and decoder void* pExecutor; // not applicable to encoder and decoder
@ -251,7 +246,7 @@ typedef struct {
int8_t reserved; int8_t reserved;
} STaskSinkFetch; } STaskSinkFetch;
typedef struct { typedef struct SStreamChildEpInfo {
int32_t nodeId; int32_t nodeId;
int32_t childId; int32_t childId;
int32_t taskId; int32_t taskId;
@ -271,31 +266,55 @@ typedef struct SCheckpointInfo {
} SCheckpointInfo; } SCheckpointInfo;
typedef struct SStreamStatus { typedef struct SStreamStatus {
int8_t taskStatus; int8_t taskStatus;
int8_t schedStatus; int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set
int8_t keepTaskStatus; int8_t schedStatus;
int8_t keepTaskStatus;
bool transferState;
int8_t timerActive; // timer is active
} SStreamStatus; } SStreamStatus;
struct SStreamTask { typedef struct SHistDataRange {
SStreamId id; SVersionRange range;
int32_t totalLevel; STimeWindow window;
int8_t taskLevel; } SHistDataRange;
int8_t outputType;
int16_t dispatchMsgType;
SStreamStatus status;
int32_t selfChildId;
int32_t nodeId; // vgroup id
SEpSet epSet;
SCheckpointInfo chkInfo;
STaskExec exec;
int8_t fillHistory; // fill history
int64_t ekey; // end ts key
int64_t endVer; // end version
// children info typedef struct SSTaskBasicInfo {
SArray* childEpInfo; // SArray<SStreamChildEpInfo*> int32_t nodeId; // vgroup id or snode id
int32_t nextCheckId; SEpSet epSet;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo> int32_t selfChildId;
int32_t totalLevel;
int8_t taskLevel;
int8_t fillHistory; // is fill history task or not
} SSTaskBasicInfo;
typedef struct SDispatchMsgInfo {
void* pData; // current dispatch data
int16_t msgType; // dispatch msg type
int32_t retryCount; // retry send data count
int64_t blockingTs; // output blocking timestamp
} SDispatchMsgInfo;
typedef struct {
int8_t outputType;
int8_t outputStatus;
SStreamQueue* outputQueue;
} SSTaskOutputInfo;
struct SStreamTask {
SStreamId id;
SSTaskBasicInfo info;
int8_t outputType;
SDispatchMsgInfo msgInfo;
SStreamStatus status;
SCheckpointInfo chkInfo;
STaskExec exec;
SHistDataRange dataRange;
SStreamId historyTaskId;
SStreamId streamTaskId;
SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info
int32_t nextCheckId;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
// output // output
union { union {
@ -314,13 +333,14 @@ struct SStreamTask {
// trigger // trigger
int8_t triggerStatus; int8_t triggerStatus;
int64_t triggerParam; int64_t triggerParam;
void* timer; void* schedTimer;
void* launchTaskTimer;
SMsgCb* pMsgCb; // msg handle SMsgCb* pMsgCb; // msg handle
SStreamState* pState; // state backend SStreamState* pState; // state backend
// the followings attributes don't be serialized // the followings attributes don't be serialized
int32_t recoverTryingDownstream; int32_t notReadyTasks;
int32_t recoverWaitingUpstream; int32_t numOfWaitingUpstream;
int64_t checkReqId; int64_t checkReqId;
SArray* checkReqIds; // shuffle SArray* checkReqIds; // shuffle
int32_t refCnt; int32_t refCnt;
@ -332,21 +352,22 @@ struct SStreamTask {
// meta // meta
typedef struct SStreamMeta { typedef struct SStreamMeta {
char* path; char* path;
TDB* db; TDB* db;
TTB* pTaskDb; TTB* pTaskDb;
TTB* pCheckpointDb; TTB* pCheckpointDb;
SHashObj* pTasks; SHashObj* pTasks;
SArray* pTaskList; // SArray<task_id*> SArray* pTaskList; // SArray<task_id*>
void* ahandle; void* ahandle;
TXN* txn; TXN* txn;
FTaskExpand* expandFunc; FTaskExpand* expandFunc;
int32_t vgId; int32_t vgId;
SRWLatch lock; SRWLatch lock;
int32_t walScanCounter; int32_t walScanCounter;
void* streamBackend; void* streamBackend;
int64_t streamBackendRid; int64_t streamBackendRid;
SHashObj* pTaskBackendUnique; SHashObj* pTaskBackendUnique;
TdThreadMutex backendMutex;
} SStreamMeta; } SStreamMeta;
int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo);
@ -431,16 +452,17 @@ typedef struct {
SMsgHead msgHead; SMsgHead msgHead;
int64_t streamId; int64_t streamId;
int32_t taskId; int32_t taskId;
} SStreamRecoverStep1Req, SStreamRecoverStep2Req; int8_t igUntreated;
} SStreamScanHistoryReq;
typedef struct { typedef struct {
int64_t streamId; int64_t streamId;
int32_t taskId; int32_t taskId;
int32_t childId; int32_t childId;
} SStreamRecoverFinishReq; } SStreamRecoverFinishReq, SStreamTransferReq;
int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq); int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq);
int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq); int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq);
typedef struct { typedef struct {
int64_t streamId; int64_t streamId;
@ -509,11 +531,11 @@ typedef struct {
SArray* checkpointVer; // SArray<SStreamCheckpointInfo> SArray* checkpointVer; // SArray<SStreamCheckpointInfo>
} SStreamRecoverDownstreamRsp; } SStreamRecoverDownstreamRsp;
int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq); int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq);
int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq); int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq);
int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp);
int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp);
int32_t tEncodeSStreamTaskRecoverReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq); int32_t tEncodeSStreamTaskRecoverReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq);
int32_t tDecodeSStreamTaskRecoverReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq); int32_t tDecodeSStreamTaskRecoverReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq);
@ -525,9 +547,11 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq);
int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq);
void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq);
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks,
int64_t dstTaskId);
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq);
int32_t streamSetupTrigger(SStreamTask* pTask); int32_t streamSetupScheduleTrigger(SStreamTask* pTask);
int32_t streamProcessRunReq(SStreamTask* pTask); int32_t streamProcessRunReq(SStreamTask* pTask);
int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec);
@ -542,30 +566,44 @@ int32_t streamSchedExec(SStreamTask* pTask);
int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock);
bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldStop(const SStreamStatus* pStatus);
bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus);
bool streamTaskIsIdle(const SStreamTask* pTask);
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz); int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId);
// recover and fill history // recover and fill history
int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version); void streamPrepareNdoCheckDownstream(SStreamTask* pTask);
int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version); int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask);
int32_t streamTaskLaunchScanHistory(SStreamTask* pTask);
int32_t streamTaskCheckStatus(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask);
int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version); int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp);
int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask);
int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask);
int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated);
bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask);
bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask);
int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask);
// common // common
int32_t streamSetParamForRecover(SStreamTask* pTask); int32_t streamSetParamForScanHistoryData(SStreamTask* pTask);
int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamRestoreParam(SStreamTask* pTask);
int32_t streamSetStatusNormal(SStreamTask* pTask); int32_t streamSetStatusNormal(SStreamTask* pTask);
const char* streamGetTaskStatusStr(int32_t status);
// source level // source level
int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver); int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow);
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow);
int32_t streamSourceRecoverScanStep1(SStreamTask* pTask); int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated);
int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq); int32_t streamSourceScanHistoryData(SStreamTask* pTask);
int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver); // int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver);
int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask);
int32_t streamDispatchTransferStateMsg(SStreamTask* pTask);
// agg level // agg level
int32_t streamAggRecoverPrepare(SStreamTask* pTask); int32_t streamAggRecoverPrepare(SStreamTask* pTask);
// int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask); int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId);
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId);
void streamMetaInit(); void streamMetaInit();
void streamMetaCleanup(); void streamMetaCleanup();
@ -591,6 +629,9 @@ int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask,
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq);
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp);
int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -28,11 +28,10 @@ extern "C" {
#endif #endif
typedef struct SStreamFileState SStreamFileState; typedef struct SStreamFileState SStreamFileState;
typedef SList SStreamSnapshot; typedef SList SStreamSnapshot;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark); GetTsFun fp, void* pFile, TSKEY delMark, const char* id);
void streamFileStateDestroy(SStreamFileState* pFileState); void streamFileStateDestroy(SStreamFileState* pFileState);
void streamFileStateClear(SStreamFileState* pFileState); void streamFileStateClear(SStreamFileState* pFileState);
bool needClearDiskBuff(SStreamFileState* pFileState); bool needClearDiskBuff(SStreamFileState* pFileState);
@ -50,6 +49,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState);
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list); int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list);
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark); int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark);
int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState); int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState);
void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -160,7 +160,7 @@ static const SSysDbTableSchema streamSchema[] = {
static const SSysDbTableSchema streamTaskSchema[] = { static const SSysDbTableSchema streamTaskSchema[] = {
{.name = "stream_name", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "stream_name", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "task_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "task_id", .bytes = 32, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "node_type", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "node_type", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false},
{.name = "level", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "level", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},

View File

@ -47,7 +47,6 @@ int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRo
} }
} }
int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx) { int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx) {
if (colDataIsNull_s(pColumnInfoData, rowIdx)) return 0; if (colDataIsNull_s(pColumnInfoData, rowIdx)) return 0;
@ -67,10 +66,6 @@ int32_t colDataGetFullLength(const SColumnInfoData* pColumnInfoData, int32_t num
} }
} }
void colDataTrim(SColumnInfoData* pColumnInfoData) {
// TODO
}
int32_t getJsonValueLen(const char* data) { int32_t getJsonValueLen(const char* data) {
int32_t dataLen = 0; int32_t dataLen = 0;
if (*data == TSDB_DATA_TYPE_NULL) { if (*data == TSDB_DATA_TYPE_NULL) {
@ -89,10 +84,6 @@ int32_t getJsonValueLen(const char* data) {
return dataLen; return dataLen;
} }
int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) {
return colDataSetVal(pColumnInfoData, rowIndex, pData, isNull);
}
int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) { int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) {
if (isNull) { if (isNull) {
// There is a placehold for each NULL value of binary or nchar type. // There is a placehold for each NULL value of binary or nchar type.
@ -174,7 +165,7 @@ int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx,
} }
int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) { static int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) {
if (!IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { if (!IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -643,7 +634,7 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) {
} else { } else {
memcpy(pStart, pCol->pData, dataSize); memcpy(pStart, pCol->pData, dataSize);
pStart += dataSize; pStart += dataSize;
} }
} }
return 0; return 0;
@ -882,41 +873,8 @@ int32_t dataBlockCompar(const void* p1, const void* p2, const void* param) {
return 0; return 0;
} }
static int32_t doAssignOneTuple(SColumnInfoData* pDstCols, int32_t numOfRows, const SSDataBlock* pSrcBlock,
int32_t tupleIndex) {
int32_t code = 0;
size_t numOfCols = taosArrayGetSize(pSrcBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = &pDstCols[i];
SColumnInfoData* pSrc = taosArrayGet(pSrcBlock->pDataBlock, i);
if (pSrc->hasNull && colDataIsNull(pSrc, pSrcBlock->info.rows, tupleIndex, pSrcBlock->pBlockAgg[i])) {
code = colDataSetVal(pDst, numOfRows, NULL, true);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
} else {
char* p = colDataGetData(pSrc, tupleIndex);
code = colDataSetVal(pDst, numOfRows, p, false);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataBlock, const int32_t* index) { static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataBlock, const int32_t* index) {
#if 0
for (int32_t i = 0; i < pDataBlock->info.rows; ++i) {
int32_t code = doAssignOneTuple(pCols, i, pDataBlock, index[i]);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
#else
size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock); size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) { for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = &pCols[i]; SColumnInfoData* pDst = &pCols[i];
@ -941,7 +899,7 @@ static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataB
} }
} }
} }
#endif
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -1101,114 +1059,6 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
#if 0
typedef struct SHelper {
int32_t index;
union {
char* pData;
int64_t i64;
double d64;
};
} SHelper;
SHelper* createTupleIndex_rv(int32_t numOfRows, SArray* pOrderInfo, SSDataBlock* pBlock) {
int32_t sortValLengthPerRow = 0;
int32_t numOfCols = taosArrayGetSize(pOrderInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i);
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, pInfo->slotId);
pInfo->pColData = pColInfo;
sortValLengthPerRow += pColInfo->info.bytes;
}
size_t len = sortValLengthPerRow * pBlock->info.rows;
char* buf = taosMemoryCalloc(1, len);
SHelper* phelper = taosMemoryCalloc(numOfRows, sizeof(SHelper));
for (int32_t i = 0; i < numOfRows; ++i) {
phelper[i].index = i;
phelper[i].pData = buf + sortValLengthPerRow * i;
}
int32_t offset = 0;
for (int32_t i = 0; i < numOfCols; ++i) {
SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i);
for (int32_t j = 0; j < numOfRows; ++j) {
phelper[j].i64 = *(int32_t*)pInfo->pColData->pData + pInfo->pColData->info.bytes * j;
// memcpy(phelper[j].pData + offset, pInfo->pColData->pData + pInfo->pColData->info.bytes * j,
// pInfo->pColData->info.bytes);
}
offset += pInfo->pColData->info.bytes;
}
taosMemoryFree(buf);
return phelper;
}
int32_t dataBlockCompar_rv(const void* p1, const void* p2, const void* param) {
const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*)param;
SHelper* left = (SHelper*)p1;
SHelper* right = (SHelper*)p2;
SArray* pInfo = pHelper->orderInfo;
int32_t offset = 0;
int32_t leftx = *(int32_t*)left->pData; //*(int32_t*)(left->pData + offset);
int32_t rightx = *(int32_t*)right->pData; //*(int32_t*)(right->pData + offset);
if (leftx == rightx) {
return 0;
} else {
return (leftx < rightx) ? -1 : 1;
}
return 0;
}
int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) {
// Allocate the additional buffer.
int64_t p0 = taosGetTimestampUs();
SSDataBlockSortHelper helper = {.pDataBlock = pDataBlock, .orderInfo = pOrderInfo};
uint32_t rows = pDataBlock->info.rows;
SHelper* index = createTupleIndex_rv(rows, helper.orderInfo, pDataBlock);
if (index == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return terrno;
}
taosqsort(index, rows, sizeof(SHelper), &helper, dataBlockCompar_rv);
int64_t p1 = taosGetTimestampUs();
SColumnInfoData* pCols = createHelpColInfoData(pDataBlock);
if (pCols == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return terrno;
}
int64_t p2 = taosGetTimestampUs();
// int32_t code = blockDataAssign(pCols, pDataBlock, index);
// if (code != TSDB_CODE_SUCCESS) {
// terrno = code;
// return code;
// }
int64_t p3 = taosGetTimestampUs();
copyBackToBlock(pDataBlock, pCols);
int64_t p4 = taosGetTimestampUs();
printf("sort:%" PRId64 ", create:%" PRId64 ", assign:%" PRId64 ", copyback:%" PRId64 ", rows:%d\n", p1 - p0, p2 - p1,
p3 - p2, p4 - p3, rows);
// destroyTupleIndex(index);
return 0;
}
#endif
void blockDataCleanup(SSDataBlock* pDataBlock) { void blockDataCleanup(SSDataBlock* pDataBlock) {
blockDataEmpty(pDataBlock); blockDataEmpty(pDataBlock);
SDataBlockInfo* pInfo = &pDataBlock->info; SDataBlockInfo* pInfo = &pDataBlock->info;
@ -1345,8 +1195,7 @@ void blockDataFreeRes(SSDataBlock* pBlock) {
colDataDestroy(pColInfoData); colDataDestroy(pColInfoData);
} }
taosArrayDestroy(pBlock->pDataBlock); pBlock->pDataBlock = taosArrayDestroy(pBlock->pDataBlock);
pBlock->pDataBlock = NULL;
taosMemoryFreeClear(pBlock->pBlockAgg); taosMemoryFreeClear(pBlock->pBlockAgg);
memset(&pBlock->info, 0, sizeof(SDataBlockInfo)); memset(&pBlock->info, 0, sizeof(SDataBlockInfo));
} }
@ -1361,6 +1210,7 @@ void* blockDataDestroy(SSDataBlock* pBlock) {
return NULL; return NULL;
} }
// todo remove it
int32_t assignOneDataBlock(SSDataBlock* dst, const SSDataBlock* src) { int32_t assignOneDataBlock(SSDataBlock* dst, const SSDataBlock* src) {
dst->info = src->info; dst->info = src->info;
dst->info.rows = 0; dst->info.rows = 0;
@ -1759,16 +1609,6 @@ static void colDataKeepFirstNRows(SColumnInfoData* pColInfoData, size_t n, size_
if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) {
// pColInfoData->varmeta.length = colDataMoveVarData(pColInfoData, 0, n); // pColInfoData->varmeta.length = colDataMoveVarData(pColInfoData, 0, n);
memset(&pColInfoData->varmeta.offset[n], 0, total - n); memset(&pColInfoData->varmeta.offset[n], 0, total - n);
} else { // reset the bitmap value
/*int32_t stopIndex = BitmapLen(n) * 8;
for(int32_t i = n; i < stopIndex; ++i) {
colDataClearNull_f(pColInfoData->nullbitmap, i);
}
int32_t remain = BitmapLen(total) - BitmapLen(n);
if (remain > 0) {
memset(pColInfoData->nullbitmap+BitmapLen(n), 0, remain);
}*/
} }
} }
@ -1875,32 +1715,6 @@ void* tDecodeDataBlock(const void* buf, SSDataBlock* pBlock) {
return (void*)buf; return (void*)buf;
} }
int32_t tEncodeDataBlocks(void** buf, const SArray* blocks) {
int32_t tlen = 0;
int32_t sz = taosArrayGetSize(blocks);
tlen += taosEncodeFixedI32(buf, sz);
for (int32_t i = 0; i < sz; i++) {
SSDataBlock* pBlock = taosArrayGet(blocks, i);
tlen += tEncodeDataBlock(buf, pBlock);
}
return tlen;
}
void* tDecodeDataBlocks(const void* buf, SArray** blocks) {
int32_t sz;
buf = taosDecodeFixedI32(buf, &sz);
*blocks = taosArrayInit(sz, sizeof(SSDataBlock));
for (int32_t i = 0; i < sz; i++) {
SSDataBlock pBlock = {0};
buf = tDecodeDataBlock(buf, &pBlock);
taosArrayPush(*blocks, &pBlock);
}
return (void*)buf;
}
static char* formatTimestamp(char* buf, int64_t val, int precision) { static char* formatTimestamp(char* buf, int64_t val, int precision) {
time_t tt; time_t tt;
int32_t ms = 0; int32_t ms = 0;
@ -1950,101 +1764,6 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) {
return buf; return buf;
} }
#if 0
void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag) {
SArray* dataBlocks = taosArrayInit(1, sizeof(SSDataBlock*));
taosArrayPush(dataBlocks, &pBlock);
blockDebugShowDataBlocks(dataBlocks, flag);
taosArrayDestroy(dataBlocks);
}
void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag) {
char pBuf[128] = {0};
int32_t sz = taosArrayGetSize(dataBlocks);
for (int32_t i = 0; i < sz; i++) {
SSDataBlock* pDataBlock = taosArrayGet(dataBlocks, i);
size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock);
int32_t rows = pDataBlock->info.rows;
printf("%s |block ver %" PRIi64 " |block type %d |child id %d|group id %" PRIu64 "\n", flag,
pDataBlock->info.version, (int32_t)pDataBlock->info.type, pDataBlock->info.childId,
pDataBlock->info.id.groupId);
for (int32_t j = 0; j < rows; j++) {
printf("%s |", flag);
for (int32_t k = 0; k < numOfCols; k++) {
SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k);
void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes);
if (k == 0) {
printf("cols:%d |", (int32_t)numOfCols);
}
if (colDataIsNull(pColInfoData, rows, j, NULL)) {
printf(" %15s |", "NULL");
continue;
}
switch (pColInfoData->info.type) {
case TSDB_DATA_TYPE_TIMESTAMP:
formatTimestamp(pBuf, *(uint64_t*)var, TSDB_TIME_PRECISION_MILLI);
printf(" %25s |", pBuf);
break;
case TSDB_DATA_TYPE_BOOL:
printf(" %15" PRIi8 " |", *(int8_t*)var);
break;
case TSDB_DATA_TYPE_TINYINT:
printf(" %15" PRIi8 " |", *(int8_t*)var);
break;
case TSDB_DATA_TYPE_SMALLINT:
printf(" %15" PRIi16 " |", *(int16_t*)var);
break;
case TSDB_DATA_TYPE_INT:
printf(" %15d |", *(int32_t*)var);
break;
case TSDB_DATA_TYPE_UTINYINT:
printf(" %15" PRIu8 " |", *(uint8_t*)var);
break;
case TSDB_DATA_TYPE_USMALLINT:
printf(" %15" PRIu16 " |", *(uint16_t*)var);
break;
case TSDB_DATA_TYPE_UINT:
printf(" %15u |", *(uint32_t*)var);
break;
case TSDB_DATA_TYPE_BIGINT:
printf(" %15" PRId64 " |", *(int64_t*)var);
break;
case TSDB_DATA_TYPE_UBIGINT:
printf(" %15" PRIu64 " |", *(uint64_t*)var);
break;
case TSDB_DATA_TYPE_FLOAT:
printf(" %15f |", *(float*)var);
break;
case TSDB_DATA_TYPE_DOUBLE:
printf(" %15lf |", *(double*)var);
break;
case TSDB_DATA_TYPE_VARCHAR:
case TSDB_DATA_TYPE_GEOMETRY: {
char* pData = colDataGetVarData(pColInfoData, j);
int32_t dataSize = TMIN(sizeof(pBuf) - 1, varDataLen(pData));
memset(pBuf, 0, dataSize + 1);
strncpy(pBuf, varDataVal(pData), dataSize);
printf(" %15s |", pBuf);
} break;
case TSDB_DATA_TYPE_NCHAR: {
char* pData = colDataGetVarData(pColInfoData, j);
int32_t dataSize = TMIN(sizeof(pBuf), varDataLen(pData));
memset(pBuf, 0, dataSize);
(void)taosUcs4ToMbs((TdUcs4*)varDataVal(pData), dataSize, pBuf);
printf(" %15s |", pBuf);
} break;
default:
break;
}
}
printf("\n");
}
}
}
#endif
// for debug // for debug
char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) { char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) {
int32_t size = 2048*1024; int32_t size = 2048*1024;
@ -2153,182 +1872,6 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf)
return dumpBuf; return dumpBuf;
} }
/**
* @brief TODO: Assume that the final generated result it less than 3M
*
* @param pReq
* @param pDataBlocks
* @param vgId
* @param suid
*
*/
#if 0
int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataBlock, STSchema* pTSchema, int32_t vgId,
tb_uid_t suid) {
int32_t bufSize = sizeof(SSubmitReq);
int32_t sz = 1;
for (int32_t i = 0; i < sz; ++i) {
const SDataBlockInfo* pBlkInfo = &pDataBlock->info;
int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock);
bufSize += pBlkInfo->rows * (TD_ROW_HEAD_LEN + pBlkInfo->rowSize + BitmapLen(colNum));
bufSize += sizeof(SSubmitBlk);
}
*pReq = taosMemoryCalloc(1, bufSize);
if (!(*pReq)) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return TSDB_CODE_FAILED;
}
void* pDataBuf = *pReq;
int32_t msgLen = sizeof(SSubmitReq);
int32_t numOfBlks = 0;
SRowBuilder rb = {0};
tdSRowInit(&rb, pTSchema->version);
for (int32_t i = 0; i < sz; ++i) {
int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock);
int32_t rows = pDataBlock->info.rows;
if (colNum <= 1) {
// invalid if only with TS col
continue;
}
if (rb.nCols != colNum) {
tdSRowSetTpInfo(&rb, colNum, pTSchema->flen);
}
SSubmitBlk* pSubmitBlk = POINTER_SHIFT(pDataBuf, msgLen);
pSubmitBlk->suid = suid;
pSubmitBlk->uid = pDataBlock->info.id.groupId;
pSubmitBlk->numOfRows = rows;
pSubmitBlk->sversion = pTSchema->version;
msgLen += sizeof(SSubmitBlk);
int32_t dataLen = 0;
for (int32_t j = 0; j < rows; ++j) { // iterate by row
tdSRowResetBuf(&rb, POINTER_SHIFT(pDataBuf, msgLen + dataLen)); // set row buf
bool isStartKey = false;
int32_t offset = 0;
for (int32_t k = 0; k < colNum; ++k) { // iterate by column
SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k);
STColumn* pCol = &pTSchema->columns[k];
void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes);
switch (pColInfoData->info.type) {
case TSDB_DATA_TYPE_TIMESTAMP:
if (!isStartKey) {
isStartKey = true;
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, true,
offset, k);
continue; // offset should keep 0 for next column
} else if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NULL, NULL,
false, offset, k);
} else {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var,
true, offset, k);
}
break;
case TSDB_DATA_TYPE_NCHAR:
case TSDB_DATA_TYPE_VARCHAR: // TSDB_DATA_TYPE_BINARY
case TSDB_DATA_TYPE_GEOMETRY: {
if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NULL, NULL,
false, offset, k);
} else {
void* data = colDataGetData(pColInfoData, j);
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NORM, data,
true, offset, k);
}
break;
}
case TSDB_DATA_TYPE_VARBINARY:
case TSDB_DATA_TYPE_DECIMAL:
case TSDB_DATA_TYPE_BLOB:
case TSDB_DATA_TYPE_JSON:
case TSDB_DATA_TYPE_MEDIUMBLOB:
uError("the column type %" PRIi16 " is defined but not implemented yet", pColInfoData->info.type);
break;
default:
if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) {
if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NULL, NULL, false,
offset, k);
} else if (pCol->type == pColInfoData->info.type) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset,
k);
} else {
char tv[8] = {0};
if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) {
float v = 0;
GET_TYPED_DATA(v, float, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) {
double v = 0;
GET_TYPED_DATA(v, double, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
}
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset,
k);
}
} else {
uError("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type);
}
break;
}
offset += TYPE_BYTES[pCol->type]; // sum/avg would convert to int64_t/uint64_t/double during aggregation
}
tdSRowEnd(&rb);
dataLen += TD_ROW_LEN(rb.pBuf);
#ifdef TD_DEBUG_PRINT_ROW
tdSRowPrint(rb.pBuf, pTSchema, __func__);
#endif
}
++numOfBlks;
pSubmitBlk->dataLen = dataLen;
msgLen += pSubmitBlk->dataLen;
}
if (numOfBlks > 0) {
(*pReq)->length = msgLen;
(*pReq)->header.vgId = htonl(vgId);
(*pReq)->header.contLen = htonl(msgLen);
(*pReq)->length = (*pReq)->header.contLen;
(*pReq)->numOfBlocks = htonl(numOfBlks);
SSubmitBlk* blk = (SSubmitBlk*)((*pReq) + 1);
while (numOfBlks--) {
int32_t dataLen = blk->dataLen;
blk->uid = htobe64(blk->uid);
blk->suid = htobe64(blk->suid);
blk->sversion = htonl(blk->sversion);
blk->dataLen = htonl(blk->dataLen);
blk->schemaLen = htonl(blk->schemaLen);
blk->numOfRows = htonl(blk->numOfRows);
blk = (SSubmitBlk*)(blk->data + dataLen);
}
} else {
// no valid rows
taosMemoryFreeClear(*pReq);
}
return TSDB_CODE_SUCCESS;
}
#endif
int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDataBlock, const STSchema* pTSchema, int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDataBlock, const STSchema* pTSchema,
int64_t uid, int32_t vgId, tb_uid_t suid) { int64_t uid, int32_t vgId, tb_uid_t suid) {
SSubmitReq2* pReq = *ppReq; SSubmitReq2* pReq = *ppReq;
@ -2732,3 +2275,149 @@ const char* blockDecode(SSDataBlock* pBlock, const char* pData) {
ASSERT(pStart - pData == dataLen); ASSERT(pStart - pData == dataLen);
return pStart; return pStart;
} }
void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList) {
// int32_t totalRows = pBlock->info.rows;
int32_t bmLen = BitmapLen(totalRows);
char* pBitmap = NULL;
int32_t maxRows = 0;
size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
// it is a reserved column for scalar function, and no data in this column yet.
if (pDst->pData == NULL) {
continue;
}
int32_t numOfRows = 0;
if (IS_VAR_DATA_TYPE(pDst->info.type)) {
int32_t j = 0;
pDst->varmeta.length = 0;
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_var(pDst, j)) {
colDataSetNull_var(pDst, numOfRows);
} else {
// fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2
char* p1 = colDataGetVarData(pDst, j);
int32_t len = 0;
if (pDst->info.type == TSDB_DATA_TYPE_JSON) {
len = getJsonValueLen(p1);
} else {
len = varDataTLen(p1);
}
char* p2 = taosMemoryMalloc(len);
memcpy(p2, p1, len);
colDataSetVal(pDst, numOfRows, p2, false);
taosMemoryFree(p2);
}
numOfRows += 1;
j += 1;
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
} else {
if (pBitmap == NULL) {
pBitmap = taosMemoryCalloc(1, bmLen);
}
memcpy(pBitmap, pDst->nullbitmap, bmLen);
memset(pDst->nullbitmap, 0, bmLen);
int32_t j = 0;
switch (pDst->info.type) {
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_TIMESTAMP:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
}
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
}
pBlock->info.rows = maxRows;
if (pBitmap != NULL) {
taosMemoryFree(pBitmap);
}
}
int32_t blockGetEncodeSize(const SSDataBlock* pBlock) {
return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock);
}

View File

@ -76,6 +76,9 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
code = 0; code = 0;
_OVER: _OVER:

View File

@ -746,9 +746,10 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RECOVER_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TRANSFER_STATE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;

View File

@ -92,7 +92,7 @@ static void vmProcessStreamQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
const STraceId *trace = &pMsg->info.traceId; const STraceId *trace = &pMsg->info.traceId;
dGTrace("vgId:%d, msg:%p get from vnode-stream queue", pVnode->vgId, pMsg); dGTrace("vgId:%d, msg:%p get from vnode-stream queue", pVnode->vgId, pMsg);
int32_t code = vnodeProcessFetchMsg(pVnode->pImpl, pMsg, pInfo); int32_t code = vnodeProcessStreamMsg(pVnode->pImpl, pMsg, pInfo);
if (code != 0) { if (code != 0) {
if (terrno != 0) code = terrno; if (terrno != 0) code = terrno;
dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType), dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType),

View File

@ -647,6 +647,14 @@ typedef struct {
// SMqSubActionLogEntry* pLogEntry; // SMqSubActionLogEntry* pLogEntry;
} SMqRebOutputObj; } SMqRebOutputObj;
typedef struct SStreamConf {
int8_t igExpired;
int8_t trigger;
int8_t fillHistory;
int64_t triggerParam;
int64_t watermark;
} SStreamConf;
typedef struct { typedef struct {
char name[TSDB_STREAM_FNAME_LEN]; char name[TSDB_STREAM_FNAME_LEN];
// ctl // ctl
@ -660,12 +668,7 @@ typedef struct {
// info // info
int64_t uid; int64_t uid;
int8_t status; int8_t status;
// config SStreamConf conf;
int8_t igExpired;
int8_t trigger;
int8_t fillHistory;
int64_t triggerParam;
int64_t watermark;
// source and target // source and target
int64_t sourceDbUid; int64_t sourceDbUid;
int64_t targetDbUid; int64_t targetDbUid;
@ -675,14 +678,18 @@ typedef struct {
int64_t targetStbUid; int64_t targetStbUid;
// fixedSinkVg is not applicable for encode and decode // fixedSinkVg is not applicable for encode and decode
SVgObj fixedSinkVg; SVgObj fixedSinkVg;
int32_t fixedSinkVgId; // 0 for shuffle int32_t fixedSinkVgId; // 0 for shuffle
// transformation // transformation
char* sql; char* sql;
char* ast; char* ast;
char* physicalPlan; char* physicalPlan;
SArray* tasks; // SArray<SArray<SStreamTask>> SArray* tasks; // SArray<SArray<SStreamTask>>
SArray* pHTasksList; // generate the results for already stored ts data
int64_t hTaskUid; // stream task for history ts data
SSchemaWrapper outputSchema; SSchemaWrapper outputSchema;
SSchemaWrapper tagSchema; SSchemaWrapper tagSchema;

View File

@ -30,7 +30,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
int64_t watermark, int64_t deleteMark); int64_t watermark, int64_t deleteMark);
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream); int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -30,11 +30,11 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) {
if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1; if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->status) < 0) return -1; if (tEncodeI8(pEncoder, pObj->status) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->igExpired) < 0) return -1; if (tEncodeI8(pEncoder, pObj->conf.igExpired) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1; if (tEncodeI8(pEncoder, pObj->conf.trigger) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->fillHistory) < 0) return -1; if (tEncodeI8(pEncoder, pObj->conf.fillHistory) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1; if (tEncodeI64(pEncoder, pObj->conf.triggerParam) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1; if (tEncodeI64(pEncoder, pObj->conf.watermark) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1; if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1; if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1;
@ -97,11 +97,11 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->igExpired) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->conf.igExpired) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->conf.trigger) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->fillHistory) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->conf.fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->conf.triggerParam) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->conf.watermark) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1;
@ -154,18 +154,10 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
return 0; return 0;
} }
void tFreeStreamObj(SStreamObj *pStream) { static void* freeStreamTasks(SArray* pTaskLevel) {
taosMemoryFree(pStream->sql); int32_t numOfLevel = taosArrayGetSize(pTaskLevel);
taosMemoryFree(pStream->ast); for (int32_t i = 0; i < numOfLevel; i++) {
taosMemoryFree(pStream->physicalPlan); SArray *pLevel = taosArrayGetP(pTaskLevel, i);
if (pStream->outputSchema.nCols) {
taosMemoryFree(pStream->outputSchema.pSchema);
}
int32_t sz = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
int32_t taskSz = taosArrayGetSize(pLevel); int32_t taskSz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < taskSz; j++) { for (int32_t j = 0; j < taskSz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
@ -175,7 +167,20 @@ void tFreeStreamObj(SStreamObj *pStream) {
taosArrayDestroy(pLevel); taosArrayDestroy(pLevel);
} }
taosArrayDestroy(pStream->tasks); return taosArrayDestroy(pTaskLevel);
}
void tFreeStreamObj(SStreamObj *pStream) {
taosMemoryFree(pStream->sql);
taosMemoryFree(pStream->ast);
taosMemoryFree(pStream->physicalPlan);
if (pStream->outputSchema.nCols || pStream->outputSchema.pSchema) {
taosMemoryFree(pStream->outputSchema.pSchema);
}
pStream->tasks = freeStreamTasks(pStream->tasks);
pStream->pHTasksList = freeStreamTasks(pStream->pHTasksList);
// tagSchema.pSchema // tagSchema.pSchema
if (pStream->tagSchema.nCols > 0) { if (pStream->tagSchema.nCols > 0) {

View File

@ -367,10 +367,10 @@ void dumpStream(SSdb *pSdb, SJson *json) {
tjsonAddStringToObject(item, "smaId", i642str(pObj->smaId)); tjsonAddStringToObject(item, "smaId", i642str(pObj->smaId));
tjsonAddStringToObject(item, "uid", i642str(pObj->uid)); tjsonAddStringToObject(item, "uid", i642str(pObj->uid));
tjsonAddStringToObject(item, "status", i642str(pObj->status)); tjsonAddStringToObject(item, "status", i642str(pObj->status));
tjsonAddStringToObject(item, "igExpired", i642str(pObj->igExpired)); tjsonAddStringToObject(item, "igExpired", i642str(pObj->conf.igExpired));
tjsonAddStringToObject(item, "trigger", i642str(pObj->trigger)); tjsonAddStringToObject(item, "trigger", i642str(pObj->conf.trigger));
tjsonAddStringToObject(item, "triggerParam", i642str(pObj->triggerParam)); tjsonAddStringToObject(item, "triggerParam", i642str(pObj->conf.triggerParam));
tjsonAddStringToObject(item, "watermark", i642str(pObj->watermark)); tjsonAddStringToObject(item, "watermark", i642str(pObj->conf.watermark));
tjsonAddStringToObject(item, "sourceDbUid", i642str(pObj->sourceDbUid)); tjsonAddStringToObject(item, "sourceDbUid", i642str(pObj->sourceDbUid));
tjsonAddStringToObject(item, "targetDbUid", i642str(pObj->targetDbUid)); tjsonAddStringToObject(item, "targetDbUid", i642str(pObj->targetDbUid));
tjsonAddStringToObject(item, "sourceDb", mndGetDbStr(pObj->sourceDb)); tjsonAddStringToObject(item, "sourceDb", mndGetDbStr(pObj->sourceDb));

View File

@ -542,32 +542,32 @@ int32_t mndRetrieveTagIdx(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, i
STR_TO_VARSTR(n3, (char *)tNameGetTableName(&stbName)); STR_TO_VARSTR(n3, (char *)tNameGetTableName(&stbName));
SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n1, false); colDataSetVal(pColInfo, numOfRows, (const char *)n1, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n2, false); colDataSetVal(pColInfo, numOfRows, (const char *)n2, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n3, false); colDataSetVal(pColInfo, numOfRows, (const char *)n3, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)&invalid, false); colDataSetVal(pColInfo, numOfRows, (const char *)&invalid, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false); colDataSetVal(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false);
char col[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; char col[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(col, (char *)pIdx->colName); STR_TO_VARSTR(col, (char *)pIdx->colName);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)col, false); colDataSetVal(pColInfo, numOfRows, (const char *)col, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(tag, (char *)"tag_index"); STR_TO_VARSTR(tag, (char *)"tag_index");
colDataAppend(pColInfo, numOfRows, (const char *)tag, false); colDataSetVal(pColInfo, numOfRows, (const char *)tag, false);
numOfRows++; numOfRows++;
sdbRelease(pSdb, pIdx); sdbRelease(pSdb, pIdx);

View File

@ -22,10 +22,12 @@
#include "tname.h" #include "tname.h"
#include "tuuid.h" #include "tuuid.h"
#define SINK_NODE_LEVEL (0)
extern bool tsDeployOnSnode; extern bool tsDeployOnSnode;
static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup); static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId,
static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); SVgObj* pVgroup, int32_t fillHistory);
static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask);
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
int64_t watermark, int64_t deleteMark) { int64_t watermark, int64_t deleteMark) {
@ -100,18 +102,16 @@ int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) {
return 0; return 0;
} }
#define SINK_NODE_LEVEL (0) int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SArray* pSinkNodeList,
SStreamTask* pTask) {
int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStreamTask* pTask) {
bool isShuffle = false; bool isShuffle = false;
if (pStream->fixedSinkVgId == 0) { if (pStream->fixedSinkVgId == 0) {
SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb); SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb);
if (pDb != NULL && pDb->cfg.numOfVgroups > 1) { if (pDb != NULL && pDb->cfg.numOfVgroups > 1) {
isShuffle = true; isShuffle = true;
pTask->outputType = TASK_OUTPUT__SHUFFLE_DISPATCH; pTask->outputType = TASK_OUTPUT__SHUFFLE_DISPATCH;
pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) { if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) {
return -1; return -1;
} }
@ -120,7 +120,6 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
sdbRelease(pMnode->pSdb, pDb); sdbRelease(pMnode->pSdb, pDb);
} }
SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList); int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList);
if (isShuffle) { if (isShuffle) {
@ -133,7 +132,7 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
for (int32_t j = 0; j < numOfSinkNodes; j++) { for (int32_t j = 0; j < numOfSinkNodes; j++) {
SStreamTask* pSinkTask = taosArrayGetP(pSinkNodeList, j); SStreamTask* pSinkTask = taosArrayGetP(pSinkNodeList, j);
if (pSinkTask->nodeId == pVgInfo->vgId) { if (pSinkTask->info.nodeId == pVgInfo->vgId) {
pVgInfo->taskId = pSinkTask->id.taskId; pVgInfo->taskId = pSinkTask->id.taskId;
break; break;
} }
@ -150,11 +149,11 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
int32_t mndAssignStreamTaskToVgroup(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) { int32_t mndAssignStreamTaskToVgroup(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) {
int32_t msgLen; int32_t msgLen;
pTask->nodeId = pVgroup->vgId; pTask->info.nodeId = pVgroup->vgId;
pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup); pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
plan->execNode.nodeId = pTask->nodeId; plan->execNode.nodeId = pTask->info.nodeId;
plan->execNode.epSet = pTask->epSet; plan->execNode.epSet = pTask->info.epSet;
if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) { if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
terrno = TSDB_CODE_QRY_INVALID_INPUT; terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1; return -1;
@ -171,14 +170,15 @@ SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) {
return pObj; return pObj;
} }
int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) { int32_t mndAssignStreamTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) {
int32_t msgLen; int32_t msgLen;
pTask->nodeId = SNODE_HANDLE; pTask->info.nodeId = SNODE_HANDLE;
pTask->epSet = mndAcquireEpFromSnode(pMnode, pSnode); pTask->info.epSet = mndAcquireEpFromSnode(pMnode, pSnode);
plan->execNode.nodeId = SNODE_HANDLE; plan->execNode.nodeId = SNODE_HANDLE;
plan->execNode.epSet = pTask->epSet; plan->execNode.epSet = pTask->info.epSet;
mDebug("s-task:0x%x set the agg task to snode:%d", pTask->id.taskId, SNODE_HANDLE);
if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) { if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
terrno = TSDB_CODE_QRY_INVALID_INPUT; terrno = TSDB_CODE_QRY_INVALID_INPUT;
@ -187,6 +187,7 @@ int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan,
return 0; return 0;
} }
// todo random choose a node to do compute
SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
void* pIter = NULL; void* pIter = NULL;
SVgObj* pVgroup = NULL; SVgObj* pVgroup = NULL;
@ -203,9 +204,9 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
} }
// create sink node for each vgroup. // create sink node for each vgroup.
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) {
SSdb* pSdb = pMnode->pSdb; SSdb* pSdb = pMnode->pSdb;
void* pIter = NULL; void* pIter = NULL;
while (1) { while (1) {
SVgObj* pVgroup = NULL; SVgObj* pVgroup = NULL;
@ -219,43 +220,45 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) {
continue; continue;
} }
mndAddSinkTaskToStream(pStream, pMnode, pVgroup->vgId, pVgroup); mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory);
sdbRelease(pSdb, pVgroup); sdbRelease(pSdb, pVgroup);
} }
return 0; return 0;
} }
int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup) { int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup,
SArray* pTaskList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); int32_t fillHistory) {
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList);
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, pStream->fillHistory, 0, pTaskList);
if (pTask == NULL) { if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
pTask->nodeId = vgId; pTask->info.nodeId = vgId;
pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup); pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
mndSetSinkTaskInfo(pStream, pTask); mndSetSinkTaskInfo(pStream, pTask);
return 0; return 0;
} }
static int32_t mndScheduleFillHistoryStreamTask(SMnode* pMnode, SStreamObj* pStream) { static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList,
return 0; SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory,
} bool hasExtraSink, int64_t firstWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList);
static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SStreamObj* pStream,
SSubplan* plan, uint64_t uid, int8_t taskLevel, int8_t fillHistory,
bool hasExtraSink) {
SStreamTask* pTask = tNewStreamTask(uid, taskLevel, fillHistory, pStream->triggerParam, pTaskList);
if (pTask == NULL) { if (pTask == NULL) {
return terrno; return terrno;
} }
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = firstWindowSkey - 1;
mDebug("add source task 0x%x window:%" PRId64 " - %" PRId64, pTask->id.taskId, pWindow->skey, pWindow->ekey);
// sink or dispatch // sink or dispatch
if (hasExtraSink) { if (hasExtraSink) {
mndAddDispatcherForInnerTask(pMnode, pStream, pTask); mndAddDispatcherForInternalTask(pMnode, pStream, pSinkTaskList, pTask);
} else { } else {
mndSetSinkTaskInfo(pStream, pTask); mndSetSinkTaskInfo(pStream, pTask);
} }
@ -274,9 +277,9 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) {
return NULL; return NULL;
} }
pEpInfo->childId = pTask->selfChildId; pEpInfo->childId = pTask->info.selfChildId;
pEpInfo->epSet = pTask->epSet; pEpInfo->epSet = pTask->info.epSet;
pEpInfo->nodeId = pTask->nodeId; pEpInfo->nodeId = pTask->info.nodeId;
pEpInfo->taskId = pTask->id.taskId; pEpInfo->taskId = pTask->id.taskId;
return pEpInfo; return pEpInfo;
@ -285,38 +288,307 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) {
void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) { void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) {
STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher; STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher;
pDispatcher->taskId = pTask->id.taskId; pDispatcher->taskId = pTask->id.taskId;
pDispatcher->nodeId = pTask->nodeId; pDispatcher->nodeId = pTask->info.nodeId;
pDispatcher->epSet = pTask->epSet; pDispatcher->epSet = pTask->info.epSet;
pDstTask->outputType = TASK_OUTPUT__FIXED_DISPATCH; pDstTask->outputType = TASK_OUTPUT__FIXED_DISPATCH;
pDstTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
} }
int32_t appendToUpstream(SStreamTask* pTask, SStreamTask* pUpstream) { int32_t setEpToDownstreamTask(SStreamTask* pTask, SStreamTask* pDownstream) {
SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask); SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask);
if (pEpInfo == NULL) { if (pEpInfo == NULL) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
if(pUpstream->childEpInfo == NULL) { if (pDownstream->pUpstreamEpInfoList == NULL) {
pUpstream->childEpInfo = taosArrayInit(4, POINTER_BYTES); pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES);
} }
taosArrayPush(pUpstream->childEpInfo, &pEpInfo); taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { static SArray* addNewTaskList(SArray* pTasksList) {
SSdb* pSdb = pMnode->pSdb; SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pTasksList, &pTaskList);
return pTaskList;
}
SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan); // set the history task id
if (pPlan == NULL) { static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) {
for (int32_t i = 0; i < taosArrayGetSize(pTaskList); ++i) {
SStreamTask** pStreamTask = taosArrayGet(pTaskList, i);
SStreamTask** pHTask = taosArrayGet(pHTaskList, i);
(*pStreamTask)->historyTaskId.taskId = (*pHTask)->id.taskId;
(*pStreamTask)->historyTaskId.streamId = (*pHTask)->id.streamId;
(*pHTask)->streamTaskId.taskId = (*pStreamTask)->id.taskId;
(*pHTask)->streamTaskId.streamId = (*pStreamTask)->id.streamId;
mDebug("s-task:0x%x related history task:0x%x, level:%d", (*pStreamTask)->id.taskId, (*pHTask)->id.taskId,
(*pHTask)->info.taskLevel);
}
}
static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream,
bool hasExtraSink, int64_t nextWindowSkey) {
// create exec stream task, since only one level, the exec task is also the source task
SArray* pTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
SArray* pHTaskList = NULL;
if (pStream->conf.fillHistory) {
pHTaskList = addNewTaskList(pStream->pHTasksList);
}
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
if (LIST_LENGTH(inner->pNodeList) != 1) {
terrno = TSDB_CODE_QRY_INVALID_INPUT; terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1; return -1;
} }
int32_t planTotLevel = LIST_LENGTH(pPlan->pSubplans); SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
pStream->tasks = taosArrayInit(planTotLevel, POINTER_BYTES); if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
// new stream task
SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0,
hasExtraSink, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return -1;
}
if (pStream->conf.fillHistory) {
SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL);
code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid,
1, hasExtraSink, nextWindowSkey);
}
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
return -1;
}
}
if (pStream->conf.fillHistory) {
setHTasksId(pTaskList, pHTaskList);
}
return TSDB_CODE_SUCCESS;
}
static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask,
SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = nextWindowSkey - 1;
mDebug("s-task:0x%x level:%d set time window:%" PRId64 " - %" PRId64, pTask->id.taskId, pTask->info.taskLevel,
pWindow->skey, pWindow->ekey);
// all the source tasks dispatch result to a single agg node.
setFixedDownstreamEpInfo(pTask, pDownstreamTask);
if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) {
return -1;
}
return setEpToDownstreamTask(pTask, pDownstreamTask);
}
static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream,
int32_t fillHistory, SStreamTask** pAggTask) {
*pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList);
if (*pAggTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
// dispatch
if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) {
return -1;
}
return 0;
}
static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask,
SStreamTask** pHAggTask) {
SArray* pAggTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
SNodeListNode* pInnerNode = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
SSubplan* plan = (SSubplan*)nodesListGetNode(pInnerNode->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_MERGE) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
*pAggTask = NULL;
SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask);
if (code != TSDB_CODE_SUCCESS) {
return -1;
}
SVgObj* pVgroup = NULL;
SSnodeObj* pSnode = NULL;
if (tsDeployOnSnode) {
pSnode = mndSchedFetchOneSnode(pMnode);
if (pSnode == NULL) {
pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
}
} else {
pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
}
if (pSnode != NULL) {
code = mndAssignStreamTaskToSnode(pMnode, *pAggTask, plan, pSnode);
} else {
code = mndAssignStreamTaskToVgroup(pMnode, *pAggTask, plan, pVgroup);
}
if (pStream->conf.fillHistory) {
SArray* pHAggTaskList = addNewTaskList(pStream->pHTasksList);
SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL);
*pHAggTask = NULL;
code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory,
pHAggTask);
if (code != TSDB_CODE_SUCCESS) {
if (pSnode != NULL) {
sdbRelease(pSdb, pSnode);
} else {
sdbRelease(pSdb, pVgroup);
}
return code;
}
if (pSnode != NULL) {
code = mndAssignStreamTaskToSnode(pMnode, *pHAggTask, plan, pSnode);
} else {
code = mndAssignStreamTaskToVgroup(pMnode, *pHAggTask, plan, pVgroup);
}
setHTasksId(pAggTaskList, pHAggTaskList);
}
if (pSnode != NULL) {
sdbRelease(pSdb, pSnode);
} else {
sdbRelease(pSdb, pVgroup);
}
return code;
}
static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream,
SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) {
SArray* pSourceTaskList = addNewTaskList(pStream->tasks);
SArray* pHSourceTaskList = NULL;
if (pStream->conf.fillHistory) {
pHSourceTaskList = addNewTaskList(pStream->pHTasksList);
}
SSdb* pSdb = pMnode->pSdb;
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
int32_t code =
doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
terrno = code;
return -1;
}
if (pStream->conf.fillHistory) {
code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup,
nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return code;
}
}
sdbRelease(pSdb, pVgroup);
}
if (pStream->conf.fillHistory) {
setHTasksId(pSourceTaskList, pHSourceTaskList);
}
return TSDB_CODE_SUCCESS;
}
static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList,
int32_t fillHistory) {
SArray* pSinkTaskList = addNewTaskList(pTasksList);
if (pStream->fixedSinkVgId == 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) {
// TODO free
return -1;
}
} else {
if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg,
fillHistory) < 0) {
// TODO free
return -1;
}
}
*pCreatedTaskList = pSinkTaskList;
return TSDB_CODE_SUCCESS;
}
static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) {
SSdb* pSdb = pMnode->pSdb;
int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans);
bool hasExtraSink = false; bool hasExtraSink = false;
bool externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0; bool externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0;
@ -329,178 +601,64 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
bool multiTarget = (pDbObj->cfg.numOfVgroups > 1); bool multiTarget = (pDbObj->cfg.numOfVgroups > 1);
sdbRelease(pSdb, pDbObj); sdbRelease(pSdb, pDbObj);
if (planTotLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) { pStream->tasks = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);
SArray* taskOneLevel = taosArrayInit(0, POINTER_BYTES); pStream->pHTasksList = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);
taosArrayPush(pStream->tasks, &taskOneLevel);
if (numOfPlanLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) {
// add extra sink // add extra sink
hasExtraSink = true; hasExtraSink = true;
if (pStream->fixedSinkVgId == 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pStream) < 0) { SArray* pSinkTaskList = NULL;
// TODO free int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0);
return -1; if (code != TSDB_CODE_SUCCESS) {
} return code;
} else { }
if (mndAddSinkTaskToStream(pStream, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg) < 0) {
// TODO free // check for fill history
return -1; if (pStream->conf.fillHistory) {
SArray* pHSinkTaskList = NULL;
code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1);
if (code != TSDB_CODE_SUCCESS) {
return code;
} }
setHTasksId(pSinkTaskList, pHSinkTaskList);
} }
} }
pStream->totalLevel = planTotLevel + hasExtraSink; pStream->totalLevel = numOfPlanLevel + hasExtraSink;
if (planTotLevel > 1) { if (numOfPlanLevel > 1) {
SStreamTask* pInnerTask; SStreamTask* pAggTask = NULL;
// inner level SStreamTask* pHAggTask = NULL;
{
SArray* taskInnerLevel = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &taskInnerLevel);
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0); int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); if (code != TSDB_CODE_SUCCESS) {
if (plan->subplanType != SUBPLAN_TYPE_MERGE) { return code;
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
pInnerTask = tNewStreamTask(pStream->uid, TASK_LEVEL__AGG, pStream->fillHistory, pStream->triggerParam, taskInnerLevel);
if (pInnerTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
qDestroyQueryPlan(pPlan);
return -1;
}
// dispatch
if (mndAddDispatcherForInnerTask(pMnode, pStream, pInnerTask) < 0) {
qDestroyQueryPlan(pPlan);
return -1;
}
if (tsDeployOnSnode) {
SSnodeObj* pSnode = mndSchedFetchOneSnode(pMnode);
if (pSnode == NULL) {
SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
sdbRelease(pSdb, pVgroup);
} else {
if (mndAssignTaskToSnode(pMnode, pInnerTask, plan, pSnode) < 0) {
sdbRelease(pSdb, pSnode);
qDestroyQueryPlan(pPlan);
return -1;
}
}
} else {
SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
sdbRelease(pSdb, pVgroup);
}
} }
// source level // source level
SArray* taskSourceLevel = taosArrayInit(0, POINTER_BYTES); return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey);
taosArrayPush(pStream->tasks, &taskSourceLevel); } else if (numOfPlanLevel == 1) {
return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey);
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, 0, taskSourceLevel);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
// all the source tasks dispatch result to a single agg node.
setFixedDownstreamEpInfo(pTask, pInnerTask);
if (mndAssignStreamTaskToVgroup(pMnode, pTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
int32_t code = appendToUpstream(pTask, pInnerTask);
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
qDestroyQueryPlan(pPlan);
return -1;
}
}
} else if (planTotLevel == 1) {
// create exec stream task, since only one level, the exec task is also the source task
SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &pTaskList);
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
if (LIST_LENGTH(inner->pNodeList) != 1) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
// new stream task
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, pStream, plan, pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, hasExtraSink);
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
qDestroyQueryPlan(pPlan);
return -1;
}
}
} }
qDestroyQueryPlan(pPlan);
return 0; return 0;
} }
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey) {
SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
if (pPlan == NULL) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey);
qDestroyQueryPlan(pPlan);
return code;
}
int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) { int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) {
SSdb* pSdb = pMnode->pSdb; SSdb* pSdb = pMnode->pSdb;
SVgObj* pVgroup = NULL; SVgObj* pVgroup = NULL;
@ -513,8 +671,8 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
terrno = TSDB_CODE_QRY_INVALID_INPUT; terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1; return -1;
} }
}else if(pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL){ } else if (pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL) {
SNode *pAst = NULL; SNode* pAst = NULL;
if (nodesStringToNode(pTopic->ast, &pAst) != 0) { if (nodesStringToNode(pTopic->ast, &pAst) != 0) {
mError("topic:%s, failed to create since %s", pTopic->name, terrstr()); mError("topic:%s, failed to create since %s", pTopic->name, terrstr());
return -1; return -1;
@ -529,7 +687,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
nodesDestroyNode(pAst); nodesDestroyNode(pAst);
} }
if(pPlan){ if (pPlan) {
int32_t levelNum = LIST_LENGTH(pPlan->pSubplans); int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
if (levelNum != 1) { if (levelNum != 1) {
qDestroyQueryPlan(pPlan); qDestroyQueryPlan(pPlan);

View File

@ -555,20 +555,20 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
streamObj.version = 1; streamObj.version = 1;
streamObj.sql = taosStrdup(pCreate->sql); streamObj.sql = taosStrdup(pCreate->sql);
streamObj.smaId = smaObj.uid; streamObj.smaId = smaObj.uid;
streamObj.watermark = pCreate->watermark; streamObj.conf.watermark = pCreate->watermark;
streamObj.deleteMark = pCreate->deleteMark; streamObj.deleteMark = pCreate->deleteMark;
streamObj.fillHistory = STREAM_FILL_HISTORY_ON; streamObj.conf.fillHistory = STREAM_FILL_HISTORY_ON;
streamObj.trigger = STREAM_TRIGGER_WINDOW_CLOSE; streamObj.conf.trigger = STREAM_TRIGGER_WINDOW_CLOSE;
streamObj.triggerParam = pCreate->maxDelay; streamObj.conf.triggerParam = pCreate->maxDelay;
streamObj.ast = taosStrdup(smaObj.ast); streamObj.ast = taosStrdup(smaObj.ast);
// check the maxDelay // check the maxDelay
if (streamObj.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) { if (streamObj.conf.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) {
int64_t msInterval = convertTimeFromPrecisionToUnit(pCreate->interval, pDb->cfg.precision, TIME_UNIT_MILLISECOND); int64_t msInterval = convertTimeFromPrecisionToUnit(pCreate->interval, pDb->cfg.precision, TIME_UNIT_MILLISECOND);
streamObj.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY; streamObj.conf.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY;
} }
if (streamObj.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) { if (streamObj.conf.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) {
streamObj.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY; streamObj.conf.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY;
} }
if (mndAllocSmaVgroup(pMnode, pDb, &streamObj.fixedSinkVg) != 0) { if (mndAllocSmaVgroup(pMnode, pDb, &streamObj.fixedSinkVg) != 0) {
@ -597,8 +597,8 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
.pAstRoot = pAst, .pAstRoot = pAst,
.topicQuery = false, .topicQuery = false,
.streamQuery = true, .streamQuery = true,
.triggerType = streamObj.trigger, .triggerType = streamObj.conf.trigger,
.watermark = streamObj.watermark, .watermark = streamObj.conf.watermark,
.deleteMark = streamObj.deleteMark, .deleteMark = streamObj.deleteMark,
}; };
@ -633,7 +633,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
if (mndSetCreateSmaVgroupCommitLogs(pMnode, pTrans, &streamObj.fixedSinkVg) != 0) goto _OVER; if (mndSetCreateSmaVgroupCommitLogs(pMnode, pTrans, &streamObj.fixedSinkVg) != 0) goto _OVER;
if (mndSetUpdateSmaStbCommitLogs(pMnode, pTrans, pStb) != 0) goto _OVER; if (mndSetUpdateSmaStbCommitLogs(pMnode, pTrans, pStb) != 0) goto _OVER;
if (mndSetCreateSmaVgroupRedoActions(pMnode, pTrans, pDb, &streamObj.fixedSinkVg, &smaObj) != 0) goto _OVER; if (mndSetCreateSmaVgroupRedoActions(pMnode, pTrans, pDb, &streamObj.fixedSinkVg, &smaObj) != 0) goto _OVER;
if (mndScheduleStream(pMnode, &streamObj) != 0) goto _OVER; if (mndScheduleStream(pMnode, &streamObj, 1685959190000) != 0) goto _OVER;
if (mndPersistStream(pMnode, pTrans, &streamObj) != 0) goto _OVER; if (mndPersistStream(pMnode, pTrans, &streamObj) != 0) goto _OVER;
if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
@ -1278,13 +1278,13 @@ static int32_t mndRetrieveSma(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc
STR_TO_VARSTR(col, (char *)""); STR_TO_VARSTR(col, (char *)"");
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)col, false); colDataSetVal(pColInfo, numOfRows, (const char *)col, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(tag, (char *)"sma_index"); STR_TO_VARSTR(tag, (char *)"sma_index");
colDataAppend(pColInfo, numOfRows, (const char *)tag, false); colDataSetVal(pColInfo, numOfRows, (const char *)tag, false);
numOfRows++; numOfRows++;
sdbRelease(pSdb, pSma); sdbRelease(pSdb, pSma);

View File

@ -239,7 +239,7 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) {
} }
static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) {
int8_t trigger = pStream->trigger; int8_t trigger = pStream->conf.trigger;
if (trigger == STREAM_TRIGGER_AT_ONCE) { if (trigger == STREAM_TRIGGER_AT_ONCE) {
strcpy(dst, "at once"); strcpy(dst, "at once");
} else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) { } else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) {
@ -299,13 +299,18 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj,
pObj->smaId = 0; pObj->smaId = 0;
pObj->uid = mndGenerateUid(pObj->name, strlen(pObj->name)); pObj->uid = mndGenerateUid(pObj->name, strlen(pObj->name));
char p[TSDB_STREAM_FNAME_LEN + 32] = {0};
snprintf(p, tListLen(p), "%s_%s", pObj->name, "fillhistory");
pObj->hTaskUid = mndGenerateUid(pObj->name, strlen(pObj->name));
pObj->status = 0; pObj->status = 0;
pObj->igExpired = pCreate->igExpired; pObj->conf.igExpired = pCreate->igExpired;
pObj->trigger = pCreate->triggerType; pObj->conf.trigger = pCreate->triggerType;
pObj->triggerParam = pCreate->maxDelay; pObj->conf.triggerParam = pCreate->maxDelay;
pObj->watermark = pCreate->watermark; pObj->conf.watermark = pCreate->watermark;
pObj->fillHistory = pCreate->fillHistory; pObj->conf.fillHistory = pCreate->fillHistory;
pObj->deleteMark = pCreate->deleteMark; pObj->deleteMark = pCreate->deleteMark;
pObj->igCheckUpdate = pCreate->igUpdate; pObj->igCheckUpdate = pCreate->igUpdate;
@ -387,9 +392,9 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj,
.pAstRoot = pAst, .pAstRoot = pAst,
.topicQuery = false, .topicQuery = false,
.streamQuery = true, .streamQuery = true,
.triggerType = pObj->trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->trigger, .triggerType = pObj->conf.trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->conf.trigger,
.watermark = pObj->watermark, .watermark = pObj->conf.watermark,
.igExpired = pObj->igExpired, .igExpired = pObj->conf.igExpired,
.deleteMark = pObj->deleteMark, .deleteMark = pObj->deleteMark,
.igCheckUpdate = pObj->igCheckUpdate, .igCheckUpdate = pObj->igCheckUpdate,
}; };
@ -428,30 +433,37 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) {
SEncoder encoder; SEncoder encoder;
tEncoderInit(&encoder, NULL, 0); tEncoderInit(&encoder, NULL, 0);
tEncodeStreamTask(&encoder, pTask); tEncodeStreamTask(&encoder, pTask);
int32_t size = encoder.pos; int32_t size = encoder.pos;
int32_t tlen = sizeof(SMsgHead) + size; int32_t tlen = sizeof(SMsgHead) + size;
tEncoderClear(&encoder); tEncoderClear(&encoder);
void *buf = taosMemoryCalloc(1, tlen); void *buf = taosMemoryCalloc(1, tlen);
if (buf == NULL) { if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
((SMsgHead *)buf)->vgId = htonl(pTask->nodeId);
((SMsgHead *)buf)->vgId = htonl(pTask->info.nodeId);
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, abuf, size); tEncoderInit(&encoder, abuf, size);
tEncodeStreamTask(&encoder, pTask); tEncodeStreamTask(&encoder, pTask);
tEncoderClear(&encoder); tEncoderClear(&encoder);
STransAction action = {0}; STransAction action = {0};
action.mTraceId = pTrans->mTraceId; action.mTraceId = pTrans->mTraceId;
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = buf; action.pCont = buf;
action.contLen = tlen; action.contLen = tlen;
action.msgType = TDMT_STREAM_TASK_DEPLOY; action.msgType = TDMT_STREAM_TASK_DEPLOY;
if (mndTransAppendRedoAction(pTrans, &action) != 0) { if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(buf); taosMemoryFree(buf);
return -1; return -1;
} }
return 0; return 0;
} }
@ -459,14 +471,33 @@ int32_t mndPersistStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStrea
int32_t level = taosArrayGetSize(pStream->tasks); int32_t level = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < level; i++) { for (int32_t i = 0; i < level; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray *pLevel = taosArrayGetP(pStream->tasks, i);
int32_t sz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < sz; j++) { int32_t numOfTasks = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < numOfTasks; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
if (mndPersistTaskDeployReq(pTrans, pTask) < 0) { if (mndPersistTaskDeployReq(pTrans, pTask) < 0) {
return -1; return -1;
} }
} }
} }
// persistent stream task for already stored ts data
if (pStream->conf.fillHistory) {
level = taosArrayGetSize(pStream->pHTasksList);
for (int32_t i = 0; i < level; i++) {
SArray *pLevel = taosArrayGetP(pStream->pHTasksList, i);
int32_t numOfTasks = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < numOfTasks; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
if (mndPersistTaskDeployReq(pTrans, pTask) < 0) {
return -1;
}
}
}
}
return 0; return 0;
} }
@ -474,11 +505,13 @@ int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) {
if (mndPersistStreamTasks(pMnode, pTrans, pStream) < 0) { if (mndPersistStreamTasks(pMnode, pTrans, pStream) < 0) {
return -1; return -1;
} }
SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream);
if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) {
mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr());
return -1; return -1;
} }
(void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY);
return 0; return 0;
} }
@ -490,6 +523,7 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr
mndTransDrop(pTrans); mndTransDrop(pTrans);
return -1; return -1;
} }
(void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED);
return 0; return 0;
} }
@ -603,16 +637,17 @@ _OVER:
static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) {
// vnode // vnode
/*if (pTask->nodeId > 0) {*/ /*if (pTask->info.nodeId > 0) {*/
SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq));
if (pReq == NULL) { if (pReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
pReq->head.vgId = htonl(pTask->nodeId);
pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId; pReq->taskId = pTask->id.taskId;
STransAction action = {0}; STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq; action.pCont = pReq;
action.contLen = sizeof(SVDropStreamTaskReq); action.contLen = sizeof(SVDropStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_DROP; action.msgType = TDMT_STREAM_TASK_DROP;
@ -732,6 +767,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr());
goto _OVER; goto _OVER;
} }
mInfo("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); mInfo("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name);
mndTransSetDbName(pTrans, createStreamReq.sourceDB, streamObj.targetDb); mndTransSetDbName(pTrans, createStreamReq.sourceDB, streamObj.targetDb);
@ -748,7 +784,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
} }
// schedule stream task for stream obj // schedule stream task for stream obj
if (mndScheduleStream(pMnode, &streamObj) < 0) { if (mndScheduleStream(pMnode, &streamObj, createStreamReq.lastTs) < 0) {
mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr());
mndTransDrop(pTrans); mndTransDrop(pTrans);
goto _OVER; goto _OVER;
@ -834,7 +870,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con
SMStreamDoCheckpointMsg *pMsg) { SMStreamDoCheckpointMsg *pMsg) {
SStreamCheckpointSourceReq req = {0}; SStreamCheckpointSourceReq req = {0};
req.checkpointId = pMsg->checkpointId; req.checkpointId = pMsg->checkpointId;
req.nodeId = pTask->nodeId; req.nodeId = pTask->info.nodeId;
req.expireTime = -1; req.expireTime = -1;
req.streamId = pTask->streamId; req.streamId = pTask->streamId;
req.taskId = pTask->taskId; req.taskId = pTask->taskId;
@ -863,7 +899,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con
SMsgHead *pMsgHead = (SMsgHead *)buf; SMsgHead *pMsgHead = (SMsgHead *)buf;
pMsgHead->contLen = htonl(tlen); pMsgHead->contLen = htonl(tlen);
pMsgHead->vgId = htonl(pTask->nodeId); pMsgHead->vgId = htonl(pTask->info.nodeId);
tEncoderClear(&encoder); tEncoderClear(&encoder);
@ -902,12 +938,12 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
for (int32_t i = 0; i < totLevel; i++) { for (int32_t i = 0; i < totLevel; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray *pLevel = taosArrayGetP(pStream->tasks, i);
SStreamTask *pTask = taosArrayGetP(pLevel, 0); SStreamTask *pTask = taosArrayGetP(pLevel, 0);
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
int32_t sz = taosArrayGetSize(pLevel); int32_t sz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < sz; j++) { for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
/*A(pTask->nodeId > 0);*/ /*A(pTask->info.nodeId > 0);*/
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->nodeId); SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
if (pVgObj == NULL) { if (pVgObj == NULL) {
taosRUnLockLatch(&pStream->lock); taosRUnLockLatch(&pStream->lock);
mndReleaseStream(pMnode, pStream); mndReleaseStream(pMnode, pStream);
@ -965,8 +1001,6 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode *pMnode = pReq->info.node;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
/*SDbObj *pDb = NULL;*/
/*SUserObj *pUser = NULL;*/
SMDropStreamReq dropReq = {0}; SMDropStreamReq dropReq = {0};
if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) {
@ -1157,7 +1191,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
} }
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->watermark, false); colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false);
char trigger[20 + VARSTR_HEADER_SIZE] = {0}; char trigger[20 + VARSTR_HEADER_SIZE] = {0};
char trigger2[20] = {0}; char trigger2[20] = {0};
@ -1187,12 +1221,16 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
while (numOfRows < rowsCapacity) { while (numOfRows < rowsCapacity) {
pShow->pIter = sdbFetch(pSdb, SDB_STREAM, pShow->pIter, (void **)&pStream); pShow->pIter = sdbFetch(pSdb, SDB_STREAM, pShow->pIter, (void **)&pStream);
if (pShow->pIter == NULL) break; if (pShow->pIter == NULL) {
break;
}
// lock // lock
taosRLockLatch(&pStream->lock); taosRLockLatch(&pStream->lock);
// count task num // count task num
int32_t sz = taosArrayGetSize(pStream->tasks); int32_t sz = taosArrayGetSize(pStream->tasks);
int32_t count = 0; int32_t count = 0;
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray *pLevel = taosArrayGetP(pStream->tasks, i);
@ -1202,10 +1240,12 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
if (numOfRows + count > rowsCapacity) { if (numOfRows + count > rowsCapacity) {
blockDataEnsureCapacity(pBlock, numOfRows + count); blockDataEnsureCapacity(pBlock, numOfRows + count);
} }
// add row for each task // add row for each task
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray *pLevel = taosArrayGetP(pStream->tasks, i);
int32_t levelCnt = taosArrayGetSize(pLevel); int32_t levelCnt = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < levelCnt; j++) { for (int32_t j = 0; j < levelCnt; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
@ -1215,18 +1255,25 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// stream name // stream name
char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName));
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false);
// task id // task id
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false);
char idstr[128] = {0};
int32_t len = tintToHex(pTask->id.taskId, &idstr[4]);
idstr[2] = '0';
idstr[3] = 'x';
varDataSetLen(idstr, len + 2);
colDataSetVal(pColInfo, numOfRows, idstr, false);
// node type // node type
char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; char nodeType[20 + VARSTR_HEADER_SIZE] = {0};
varDataSetLen(nodeType, 5); varDataSetLen(nodeType, 5);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
if (pTask->nodeId > 0) { if (pTask->info.nodeId > 0) {
memcpy(varDataVal(nodeType), "vnode", 5); memcpy(varDataVal(nodeType), "vnode", 5);
} else { } else {
memcpy(varDataVal(nodeType), "snode", 5); memcpy(varDataVal(nodeType), "snode", 5);
@ -1235,30 +1282,50 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// node id // node id
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
int64_t nodeId = TMAX(pTask->nodeId, 0); int64_t nodeId = TMAX(pTask->info.nodeId, 0);
colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false);
// level // level
char level[20 + VARSTR_HEADER_SIZE] = {0}; char level[20 + VARSTR_HEADER_SIZE] = {0};
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
memcpy(varDataVal(level), "source", 6); memcpy(varDataVal(level), "source", 6);
varDataSetLen(level, 6); varDataSetLen(level, 6);
} else if (pTask->taskLevel == TASK_LEVEL__AGG) { } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
memcpy(varDataVal(level), "agg", 3); memcpy(varDataVal(level), "agg", 3);
varDataSetLen(level, 3); varDataSetLen(level, 3);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) { } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
memcpy(varDataVal(level), "sink", 4); memcpy(varDataVal(level), "sink", 4);
varDataSetLen(level, 4); varDataSetLen(level, 4);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
} }
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&level, false); colDataSetVal(pColInfo, numOfRows, (const char *)&level, false);
// status // status
char status[20 + VARSTR_HEADER_SIZE] = {0}; char status[20 + VARSTR_HEADER_SIZE] = {0};
char status2[20] = {0}; int8_t taskStatus = atomic_load_8(&pTask->status.taskStatus);
strcpy(status, "normal"); if (taskStatus == TASK_STATUS__NORMAL) {
STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status)); memcpy(varDataVal(status), "normal", 6);
varDataSetLen(status, 6);
} else if (taskStatus == TASK_STATUS__DROPPING) {
memcpy(varDataVal(status), "dropping", 8);
varDataSetLen(status, 8);
} else if (taskStatus == TASK_STATUS__FAIL) {
memcpy(varDataVal(status), "fail", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__STOP) {
memcpy(varDataVal(status), "stop", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__SCAN_HISTORY) {
memcpy(varDataVal(status), "history", 7);
varDataSetLen(status, 7);
} else if (taskStatus == TASK_STATUS__HALT) {
memcpy(varDataVal(status), "halt", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__PAUSE) {
memcpy(varDataVal(status), "pause", 5);
varDataSetLen(status, 5);
}
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); colDataSetVal(pColInfo, numOfRows, (const char *)&status, false);
@ -1287,10 +1354,10 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
pReq->head.vgId = htonl(pTask->nodeId); pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId; pReq->taskId = pTask->id.taskId;
STransAction action = {0}; STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq; action.pCont = pReq;
action.contLen = sizeof(SVPauseStreamTaskReq); action.contLen = sizeof(SVPauseStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_PAUSE; action.msgType = TDMT_STREAM_TASK_PAUSE;
@ -1301,21 +1368,36 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) {
return 0; return 0;
} }
int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray* tasks) {
int32_t size = taosArrayGetSize(pStream->tasks); int32_t size = taosArrayGetSize(tasks);
for (int32_t i = 0; i < size; i++) { for (int32_t i = 0; i < size; i++) {
SArray *pTasks = taosArrayGetP(pStream->tasks, i); SArray *pTasks = taosArrayGetP(tasks, i);
int32_t sz = taosArrayGetSize(pTasks); int32_t sz = taosArrayGetSize(pTasks);
for (int32_t j = 0; j < sz; j++) { for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pTasks, j); SStreamTask *pTask = taosArrayGetP(pTasks, j);
if (pTask->taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) { if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) {
return -1; return -1;
} }
if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) {
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
}
} }
} }
return 0; return 0;
} }
int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) {
int32_t code = mndPauseAllStreamTaskImpl(pTrans, pStream->tasks);
if (code != 0) {
return code;
}
// pStream->pHTasksList is null
// code = mndPauseAllStreamTaskImpl(pTrans, pStream->pHTasksList);
return code;
}
static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, int8_t status) { static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, int8_t status) {
SStreamObj streamObj = {0}; SStreamObj streamObj = {0};
memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN); memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN);
@ -1355,6 +1437,10 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
} }
} }
if (pStream->status == STREAM_STATUS__PAUSE) {
return 0;
}
if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
return -1; return -1;
@ -1410,11 +1496,11 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SStreamTask *pTask, int8_t ig
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
pReq->head.vgId = htonl(pTask->nodeId); pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId; pReq->taskId = pTask->id.taskId;
pReq->igUntreated = igUntreated; pReq->igUntreated = igUntreated;
STransAction action = {0}; STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq; action.pCont = pReq;
action.contLen = sizeof(SVResumeStreamTaskReq); action.contLen = sizeof(SVResumeStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_RESUME; action.msgType = TDMT_STREAM_TASK_RESUME;
@ -1432,11 +1518,16 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn
int32_t sz = taosArrayGetSize(pTasks); int32_t sz = taosArrayGetSize(pTasks);
for (int32_t j = 0; j < sz; j++) { for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pTasks, j); SStreamTask *pTask = taosArrayGetP(pTasks, j);
if (pTask->taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) {
return -1; return -1;
} }
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__PAUSE) {
atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
}
} }
} }
// pStream->pHTasksList is null
return 0; return 0;
} }
@ -1463,6 +1554,10 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
} }
} }
if (pStream->status != STREAM_STATUS__PAUSE) {
return 0;
}
if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
return -1; return -1;

View File

@ -875,7 +875,7 @@ static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *p
// if (pDb == NULL || pDb->compactStartTime <= 0) { // if (pDb == NULL || pDb->compactStartTime <= 0) {
// colDataSetNULL(pColInfo, numOfRows); // colDataSetNULL(pColInfo, numOfRows);
// } else { // } else {
// colDataAppend(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false); // colDataSetVal(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false);
// } // }
numOfRows++; numOfRows++;

View File

@ -52,23 +52,21 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) {
FAIL: FAIL:
if (pMsg->info.handle == NULL) return; if (pMsg->info.handle == NULL) return;
SRpcMsg rsp = { SRpcMsg rsp = { .code = code, .info = pMsg->info};
.code = code,
.info = pMsg->info,
};
tmsgSendRsp(&rsp); tmsgSendRsp(&rsp);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg); taosFreeQitem(pMsg);
} }
int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
ASSERT(pTask->taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->childEpInfo) != 0); ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0);
pTask->refCnt = 1; pTask->refCnt = 1;
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
pTask->inputQueue = streamQueueOpen(0); pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->outputQueue = streamQueueOpen(0); pTask->inputQueue = streamQueueOpen(512 << 10);
pTask->outputQueue = streamQueueOpen(512 << 10);
if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) { if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
return -1; return -1;
@ -85,14 +83,18 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
return -1; return -1;
} }
int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo); int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList);
SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState }; SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory };
initStreamStateAPI(&handle.api); initStreamStateAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0);
ASSERT(pTask->exec.pExecutor); ASSERT(pTask->exec.pExecutor);
streamSetupTrigger(pTask); streamSetupScheduleTrigger(pTask);
qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE,
pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel);
return 0; return 0;
} }
@ -149,9 +151,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
taosMemoryFree(pTask); taosMemoryFree(pTask);
return -1; return -1;
} }
tDecoderClear(&decoder); tDecoderClear(&decoder);
ASSERT(pTask->taskLevel == TASK_LEVEL__AGG); ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG);
// 2.save task // 2.save task
taosWLockLatch(&pSnode->pMeta->lock); taosWLockLatch(&pSnode->pMeta->lock);
@ -161,19 +164,20 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
return -1; return -1;
} }
int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta);
taosWUnLockLatch(&pSnode->pMeta->lock); taosWUnLockLatch(&pSnode->pMeta->lock);
// 3.go through recover steps to fill history streamPrepareNdoCheckDownstream(pTask);
if (pTask->fillHistory) { qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr,
streamSetParamForRecover(pTask); streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks);
streamAggRecoverPrepare(pTask);
}
return 0; return 0;
} }
int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) {
SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg;
qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId);
streamMetaRemoveTask(pSnode->pMeta, pReq->taskId); streamMetaRemoveTask(pSnode->pMeta, pReq->taskId);
return 0; return 0;
} }
@ -255,13 +259,15 @@ int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) {
} }
int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
switch (pMsg->msgType) { switch (pMsg->msgType) {
case TDMT_STREAM_TASK_DEPLOY: case TDMT_STREAM_TASK_DEPLOY: {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return sndProcessTaskDeployReq(pSnode, pReq, len); return sndProcessTaskDeployReq(pSnode, pReq, len);
}
case TDMT_STREAM_TASK_DROP: case TDMT_STREAM_TASK_DROP:
return sndProcessTaskDropReq(pSnode, pReq, len); return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen);
default: default:
ASSERT(0); ASSERT(0);
} }
@ -277,7 +283,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, msg, msgLen); tDecoderInit(&decoder, msg, msgLen);
tDecodeSStreamRecoverFinishReq(&decoder, &req); tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder); tDecoderClear(&decoder);
// find task // find task
@ -286,7 +292,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
return -1; return -1;
} }
// do process request // do process request
if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) { if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) {
streamMetaReleaseTask(pSnode->pMeta, pTask); streamMetaReleaseTask(pSnode->pMeta, pTask);
return -1; return -1;
} }
@ -300,6 +306,102 @@ int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) {
return 0; return 0;
} }
int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
char *msgStr = pMsg->pCont;
char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
SStreamTaskCheckReq req;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen);
tDecodeStreamTaskCheckReq(&decoder, &req);
tDecoderClear(&decoder);
int32_t taskId = req.downstreamTaskId;
SStreamTaskCheckRsp rsp = {
.reqId = req.reqId,
.streamId = req.streamId,
.childId = req.childId,
.downstreamNodeId = req.downstreamNodeId,
.downstreamTaskId = req.downstreamTaskId,
.upstreamNodeId = req.upstreamNodeId,
.upstreamTaskId = req.upstreamTaskId,
};
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, taskId);
if (pTask != NULL) {
rsp.status = streamTaskCheckStatus(pTask);
streamMetaReleaseTask(pSnode->pMeta, pTask);
qDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d",
pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId,
streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status);
} else {
rsp.status = 0;
qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64
") from task:0x%x (vgId:%d), rsp status %d",
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
}
SEncoder encoder;
int32_t code;
int32_t len;
tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code);
if (code < 0) {
qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId);
return -1;
}
void *buf = rpcMallocCont(sizeof(SMsgHead) + len);
((SMsgHead *)buf)->vgId = htonl(req.upstreamNodeId);
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, (uint8_t *)abuf, len);
tEncodeStreamTaskCheckRsp(&encoder, &rsp);
tEncoderClear(&encoder);
SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
tmsgSendRsp(&rspMsg);
return 0;
}
int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code;
SStreamTaskCheckRsp rsp;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)pReq, len);
code = tDecodeStreamTaskCheckRsp(&decoder, &rsp);
if (code < 0) {
tDecoderClear(&decoder);
return -1;
}
tDecoderClear(&decoder);
qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d",
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status);
SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.upstreamTaskId);
if (pTask == NULL) {
qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
pSnode->pMeta->vgId);
return -1;
}
code = streamProcessCheckRsp(pTask, &rsp);
streamMetaReleaseTask(pSnode->pMeta, pTask);
return code;
}
int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
switch (pMsg->msgType) { switch (pMsg->msgType) {
case TDMT_STREAM_TASK_RUN: case TDMT_STREAM_TASK_RUN:
@ -312,10 +414,14 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
return sndProcessTaskRetrieveReq(pSnode, pMsg); return sndProcessTaskRetrieveReq(pSnode, pMsg);
case TDMT_STREAM_RETRIEVE_RSP: case TDMT_STREAM_RETRIEVE_RSP:
return sndProcessTaskRetrieveRsp(pSnode, pMsg); return sndProcessTaskRetrieveRsp(pSnode, pMsg);
case TDMT_STREAM_RECOVER_FINISH: case TDMT_STREAM_SCAN_HISTORY_FINISH:
return sndProcessTaskRecoverFinishReq(pSnode, pMsg); return sndProcessTaskRecoverFinishReq(pSnode, pMsg);
case TDMT_STREAM_RECOVER_FINISH_RSP: case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); return sndProcessTaskRecoverFinishRsp(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK:
return sndProcessStreamTaskCheckReq(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
return sndProcessStreamTaskCheckRsp(pSnode, pMsg);
default: default:
ASSERT(0); ASSERT(0);
} }

View File

@ -101,6 +101,7 @@ void initStateStoreAPI(SStateStore* pStore) {
pStore->streamStateCommit = streamStateCommit; pStore->streamStateCommit = streamStateCommit;
pStore->streamStateDestroy= streamStateDestroy; pStore->streamStateDestroy= streamStateDestroy;
pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint; pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint;
pStore->streamStateReloadInfo = streamStateReloadInfo;
} }
void initFunctionStateStore(SFunctionStateStore* pStore) { void initFunctionStateStore(SFunctionStateStore* pStore) {

View File

@ -96,6 +96,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp); int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp);
int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg);
int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo);
int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo);
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs);
void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs);
void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit); void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit);
@ -126,8 +127,6 @@ int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey,
int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload,
int32_t payloadLen); int32_t payloadLen);
int64_t metaGetTbNum(SMeta *pMeta);
int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables); int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables);
// tsdb // tsdb

View File

@ -45,27 +45,10 @@ extern "C" {
typedef struct STqOffsetStore STqOffsetStore; typedef struct STqOffsetStore STqOffsetStore;
// tqPush // tqPush
#define EXTRACT_DATA_FROM_WAL_ID (-1)
// typedef struct { #define STREAM_TASK_STATUS_CHECK_ID (-2)
// // msg info
// int64_t consumerId;
// int64_t reqOffset;
// int64_t processedVer;
// int32_t epoch;
// // rpc info
// int64_t reqId;
// SRpcHandleInfo rpcInfo;
// tmr_h timerId;
// int8_t tmrStopped;
// // exec
// int8_t inputStatus;
// int8_t execStatus;
// SStreamQueue inputQ;
// SRWLatch lock;
// } STqPushHandle;
// tqExec // tqExec
typedef struct { typedef struct {
char* qmsg; // SubPlanToString char* qmsg; // SubPlanToString
} STqExecCol; } STqExecCol;
@ -184,10 +167,10 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname);
// tqStream // tqStream
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver);
int32_t tqStreamTasksScanWal(STQ* pTq); int32_t tqStreamTasksScanWal(STQ* pTq);
int32_t tqStreamTasksStatusCheck(STQ* pTq);
// tq util // tq util
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId);
int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem); int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem);
int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);
int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId,

View File

@ -64,7 +64,6 @@ typedef struct STsdbReadSnap STsdbReadSnap;
typedef struct SBlockInfo SBlockInfo; typedef struct SBlockInfo SBlockInfo;
typedef struct SSmaInfo SSmaInfo; typedef struct SSmaInfo SSmaInfo;
typedef struct SBlockCol SBlockCol; typedef struct SBlockCol SBlockCol;
typedef struct SVersionRange SVersionRange;
typedef struct SLDataIter SLDataIter; typedef struct SLDataIter SLDataIter;
typedef struct SDiskCol SDiskCol; typedef struct SDiskCol SDiskCol;
typedef struct SDiskData SDiskData; typedef struct SDiskData SDiskData;
@ -383,11 +382,6 @@ struct TSDBKEY {
TSKEY ts; TSKEY ts;
}; };
struct SVersionRange {
uint64_t minVer;
uint64_t maxVer;
};
typedef struct SMemSkipListNode SMemSkipListNode; typedef struct SMemSkipListNode SMemSkipListNode;
struct SMemSkipListNode { struct SMemSkipListNode {
int8_t level; int8_t level;

View File

@ -178,7 +178,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta);
void* metaGetIdx(SMeta* pMeta); void* metaGetIdx(SMeta* pMeta);
void* metaGetIvtIdx(SMeta* pMeta); void* metaGetIvtIdx(SMeta* pMeta);
void metaReaderInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags); int64_t metaGetTbNum(SMeta *pMeta);
void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg); int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg);
int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid); int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid);
@ -217,6 +218,7 @@ int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver);
int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg);
int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqUnregisterPushHandle(STQ* pTq, void* pHandle);
int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed.
int tqCheckStreamStatus(STQ* pTq);
int tqCommit(STQ*); int tqCommit(STQ*);
int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd);
@ -238,14 +240,14 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, SRpcMsg* pMsg);
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec);
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqCheckLogInWal(STQ* pTq, int64_t version); int32_t tqCheckLogInWal(STQ* pTq, int64_t version);

View File

@ -17,13 +17,13 @@
#include "osMemory.h" #include "osMemory.h"
#include "tencode.h" #include "tencode.h"
void _metaReaderInit(SMetaReader *pReader, void *pVnode, int32_t flags, SStoreMeta *pAPI) { void _metaReaderInit(SMetaReader* pReader, void* pVnode, int32_t flags, SStoreMeta* pAPI) {
SMeta *pMeta = ((SVnode *)pVnode)->pMeta; SMeta* pMeta = ((SVnode*)pVnode)->pMeta;
metaReaderInit(pReader, pMeta, flags); metaReaderDoInit(pReader, pMeta, flags);
pReader->pAPI = pAPI; pReader->pAPI = pAPI;
} }
void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) { void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) {
memset(pReader, 0, sizeof(*pReader)); memset(pReader, 0, sizeof(*pReader));
pReader->pMeta = pMeta; pReader->pMeta = pMeta;
pReader->flags = flags; pReader->flags = flags;
@ -143,7 +143,7 @@ tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name) {
int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) { int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) {
int code = 0; int code = 0;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid); code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) { if (code < 0) {
metaReaderClear(&mr); metaReaderClear(&mr);
@ -159,7 +159,7 @@ int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) {
int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) { int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) {
int code = 0; int code = 0;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, (SMeta *)meta, 0); metaReaderDoInit(&mr, (SMeta *)meta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid); code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) { if (code < 0) {
metaReaderClear(&mr); metaReaderClear(&mr);
@ -174,7 +174,7 @@ int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) {
int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) { int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) {
int code = 0; int code = 0;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); metaReaderDoInit(&mr, ((SVnode *)pVnode)->pMeta, 0);
SMetaReader *pReader = &mr; SMetaReader *pReader = &mr;
@ -195,7 +195,7 @@ int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) {
int metaGetTableTypeByName(void *pVnode, char *tbName, ETableType *tbType) { int metaGetTableTypeByName(void *pVnode, char *tbName, ETableType *tbType) {
int code = 0; int code = 0;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
code = metaGetTableEntryByName(&mr, tbName); code = metaGetTableEntryByName(&mr, tbName);
if (code == 0) *tbType = mr.me.type; if (code == 0) *tbType = mr.me.type;
@ -215,7 +215,7 @@ int metaReadNext(SMetaReader *pReader) {
int metaGetTableTtlByUid(void *meta, uint64_t uid, int64_t *ttlDays) { int metaGetTableTtlByUid(void *meta, uint64_t uid, int64_t *ttlDays) {
int code = -1; int code = -1;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, (SMeta *)meta, 0); metaReaderDoInit(&mr, (SMeta *)meta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid); code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) { if (code < 0) {
goto _exit; goto _exit;
@ -244,9 +244,7 @@ SMTbCursor *metaOpenTbCursor(void *pVnode) {
return NULL; return NULL;
} }
SVnode *pVnodeObj = pVnode; SVnode* pVnodeObj = pVnode;
// metaReaderInit(&pTbCur->mr, pVnodeObj->pMeta, 0);
// tdbTbcMoveToFirst((TBC *)pTbCur->pDbc); // tdbTbcMoveToFirst((TBC *)pTbCur->pDbc);
pTbCur->pMeta = pVnodeObj->pMeta; pTbCur->pMeta = pVnodeObj->pMeta;
pTbCur->paused = 1; pTbCur->paused = 1;
@ -277,7 +275,7 @@ void metaPauseTbCursor(SMTbCursor *pTbCur) {
} }
void metaResumeTbCursor(SMTbCursor *pTbCur, int8_t first) { void metaResumeTbCursor(SMTbCursor *pTbCur, int8_t first) {
if (pTbCur->paused) { if (pTbCur->paused) {
metaReaderInit(&pTbCur->mr, pTbCur->pMeta, 0); metaReaderDoInit(&pTbCur->mr, pTbCur->pMeta, 0);
tdbTbcOpen(((SMeta *)pTbCur->pMeta)->pUidIdx, (TBC **)&pTbCur->pDbc, NULL); tdbTbcOpen(((SMeta *)pTbCur->pMeta)->pUidIdx, (TBC **)&pTbCur->pDbc, NULL);
@ -784,7 +782,7 @@ STSmaWrapper *metaGetSmaInfoByTable(SMeta *pMeta, tb_uid_t uid, bool deepCopy) {
} }
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pMeta, 0); metaReaderDoInit(&mr, pMeta, 0);
int64_t smaId; int64_t smaId;
int smaIdx = 0; int smaIdx = 0;
STSma *pTSma = NULL; STSma *pTSma = NULL;
@ -839,7 +837,7 @@ _err:
STSma *metaGetSmaInfoByIndex(SMeta *pMeta, int64_t indexUid) { STSma *metaGetSmaInfoByIndex(SMeta *pMeta, int64_t indexUid) {
STSma *pTSma = NULL; STSma *pTSma = NULL;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pMeta, 0); metaReaderDoInit(&mr, pMeta, 0);
if (metaReaderGetTableEntryByUid(&mr, indexUid) < 0) { if (metaReaderGetTableEntryByUid(&mr, indexUid) < 0) {
metaWarn("vgId:%d, failed to get table entry for smaId:%" PRIi64, TD_VID(pMeta->pVnode), indexUid); metaWarn("vgId:%d, failed to get table entry for smaId:%" PRIi64, TD_VID(pMeta->pVnode), indexUid);
metaReaderClear(&mr); metaReaderClear(&mr);

View File

@ -37,7 +37,7 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) {
// validate req // validate req
// save smaIndex // save smaIndex
metaReaderInit(&mr, pMeta, 0); metaReaderDoInit(&mr, pMeta, 0);
if (metaReaderGetTableEntryByUidCache(&mr, pCfg->indexUid) == 0) { if (metaReaderGetTableEntryByUidCache(&mr, pCfg->indexUid) == 0) {
#if 1 #if 1
terrno = TSDB_CODE_TSMA_ALREADY_EXIST; terrno = TSDB_CODE_TSMA_ALREADY_EXIST;

View File

@ -709,7 +709,7 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs
} }
// validate req // validate req
metaReaderInit(&mr, pMeta, 0); metaReaderDoInit(&mr, pMeta, 0);
if (metaGetTableEntryByName(&mr, pReq->name) == 0) { if (metaGetTableEntryByName(&mr, pReq->name) == 0) {
if (pReq->type == TSDB_CHILD_TABLE && pReq->ctb.suid != mr.me.ctbEntry.suid) { if (pReq->type == TSDB_CHILD_TABLE && pReq->ctb.suid != mr.me.ctbEntry.suid) {
terrno = TSDB_CODE_TDB_TABLE_IN_OTHER_STABLE; terrno = TSDB_CODE_TDB_TABLE_IN_OTHER_STABLE;

View File

@ -896,7 +896,7 @@ static int32_t tdRSmaInfoClone(SSma *pSma, SRSmaInfo *pInfo) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
metaReaderInit(&mr, SMA_META(pSma), 0); metaReaderDoInit(&mr, SMA_META(pSma), 0);
smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid); smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid);
if (metaReaderGetTableEntryByUidCache(&mr, pInfo->suid) < 0) { if (metaReaderGetTableEntryByUidCache(&mr, pInfo->suid) < 0) {
code = terrno; code = terrno;
@ -1116,7 +1116,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
} }
int64_t nRsmaTables = 0; int64_t nRsmaTables = 0;
metaReaderInit(&mr, SMA_META(pSma), 0); metaReaderDoInit(&mr, SMA_META(pSma), 0);
if (!(uidStore.tbUids = taosArrayInit(1024, sizeof(tb_uid_t)))) { if (!(uidStore.tbUids = taosArrayInit(1024, sizeof(tb_uid_t)))) {
code = TSDB_CODE_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);

View File

@ -18,8 +18,6 @@
// 0: not init // 0: not init
// 1: already inited // 1: already inited
// 2: wait to be inited or cleaup // 2: wait to be inited or cleaup
#define WAL_READ_TASKS_ID (-1)
static int32_t tqInitialize(STQ* pTq); static int32_t tqInitialize(STQ* pTq);
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; }
@ -158,6 +156,29 @@ void tqClose(STQ* pTq) {
taosMemoryFree(pTq); taosMemoryFree(pTq);
} }
static bool hasStreamTaskInTimer(SStreamMeta* pMeta) {
bool inTimer = false;
taosWLockLatch(&pMeta->lock);
void* pIter = NULL;
while(1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
if (pIter == NULL) {
break;
}
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->status.timerActive == 1) {
inTimer = true;
}
}
taosWUnLockLatch(&pMeta->lock);
return inTimer;
}
void tqNotifyClose(STQ* pTq) { void tqNotifyClose(STQ* pTq) {
if (pTq != NULL) { if (pTq != NULL) {
taosWLockLatch(&pTq->pStreamMeta->lock); taosWLockLatch(&pTq->pStreamMeta->lock);
@ -170,16 +191,29 @@ void tqNotifyClose(STQ* pTq) {
} }
SStreamTask* pTask = *(SStreamTask**)pIter; SStreamTask* pTask = *(SStreamTask**)pIter;
tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr); tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
pTask->status.taskStatus = TASK_STATUS__STOP; pTask->status.taskStatus = TASK_STATUS__STOP;
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
int64_t el = taosGetTimestampMs() - st; int64_t el = taosGetTimestampMs() - st;
tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el); tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
} }
taosWUnLockLatch(&pTq->pStreamMeta->lock); taosWUnLockLatch(&pTq->pStreamMeta->lock);
tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId);
int64_t st = taosGetTimestampMs();
while(hasStreamTaskInTimer(pTq->pStreamMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pTq->pStreamMeta->vgId);
taosMsleep(100);
}
int64_t el = taosGetTimestampMs() - st;
tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el);
} }
} }
@ -771,19 +805,32 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->pMsgCb = &pTq->pVnode->msgCb;
pTask->pMeta = pTq->pStreamMeta; pTask->pMeta = pTq->pStreamMeta;
pTask->chkInfo.version = ver; pTask->chkInfo.version = ver;
pTask->chkInfo.currentVer = ver; pTask->chkInfo.currentVer = ver;
// expand executor pTask->dataRange.range.maxVer = ver;
pTask->status.taskStatus = (pTask->fillHistory) ? TASK_STATUS__WAIT_DOWNSTREAM : TASK_STATUS__NORMAL; pTask->dataRange.range.minVer = ver;
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); SStreamTask* pSateTask = pTask;
SStreamTask task = {0};
if (pTask->info.fillHistory) {
task.id = pTask->streamTaskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1);
if (pTask->pState == NULL) { if (pTask->pState == NULL) {
return -1; return -1;
} }
SReadHandle handle = {.vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState}; SReadHandle handle = {.vnode = pTq->pVnode,
.initTqReader = 1,
.pStateBackend = pTask->pState,
.fillHistory = pTask->info.fillHistory,
.winRange = pTask->dataRange.window};
initStorageAPI(&handle.api); initStorageAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
@ -792,14 +839,25 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
} }
qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
} else if (pTask->taskLevel == TASK_LEVEL__AGG) { } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); SStreamTask* pSateTask = pTask;
SStreamTask task = {0};
if (pTask->info.fillHistory) {
task.id = pTask->streamTaskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1);
if (pTask->pState == NULL) { if (pTask->pState == NULL) {
return -1; return -1;
} }
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo); int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList);
SReadHandle handle = {.vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState}; SReadHandle handle = {.vnode = NULL,
.numOfVgroups = numOfVgroups,
.pStateBackend = pTask->pState,
.fillHistory = pTask->info.fillHistory,
.winRange = pTask->dataRange.window};
initStorageAPI(&handle.api); initStorageAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
@ -834,15 +892,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr); tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr);
} }
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
SWalFilterCond cond = {.deleteMsg = 1}; // delete msg also extract from wal files SWalFilterCond cond = {.deleteMsg = 1}; // delete msg also extract from wal files
pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond); pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond);
} }
streamSetupTrigger(pTask); streamSetupScheduleTrigger(pTask);
tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId, tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64
pTask->id.idStr, pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel); " child id:%d, level:%d, scan-history:%d, trigger:%" PRId64 " ms",
vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel,
pTask->info.fillHistory, pTask->triggerParam);
// next valid version will add one // next valid version will add one
pTask->chkInfo.version += 1; pTask->chkInfo.version += 1;
@ -858,10 +918,11 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
tDecodeSStreamTaskCheckReq(&decoder, &req); tDecodeStreamTaskCheckReq(&decoder, &req);
tDecoderClear(&decoder); tDecoderClear(&decoder);
int32_t taskId = req.downstreamTaskId; int32_t taskId = req.downstreamTaskId;
SStreamTaskCheckRsp rsp = { SStreamTaskCheckRsp rsp = {
.reqId = req.reqId, .reqId = req.reqId,
.streamId = req.streamId, .streamId = req.streamId,
@ -878,23 +939,20 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
rsp.status = streamTaskCheckStatus(pTask); rsp.status = streamTaskCheckStatus(pTask);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d",
") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d", pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId,
pTask->id.idStr, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status);
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} else { } else {
rsp.status = 0; rsp.status = 0;
tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d",
") %d at node %d, check req from task:0x%x at node %d, rsp status %d", taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId,
rsp.status);
} }
SEncoder encoder; SEncoder encoder;
int32_t code; int32_t code;
int32_t len; int32_t len;
tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code); tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code);
if (code < 0) { if (code < 0) {
tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId); tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId);
return -1; return -1;
@ -905,7 +963,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, (uint8_t*)abuf, len); tEncoderInit(&encoder, (uint8_t*)abuf, len);
tEncodeSStreamTaskCheckRsp(&encoder, &rsp); tEncodeStreamTaskCheckRsp(&encoder, &rsp);
tEncoderClear(&encoder); tEncoderClear(&encoder);
SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info}; SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
@ -914,13 +972,16 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
return 0; return 0;
} }
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code; int32_t code;
SStreamTaskCheckRsp rsp; SStreamTaskCheckRsp rsp;
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen); tDecoderInit(&decoder, (uint8_t*)pReq, len);
code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp); code = tDecodeStreamTaskCheckRsp(&decoder, &rsp);
if (code < 0) { if (code < 0) {
tDecoderClear(&decoder); tDecoderClear(&decoder);
@ -928,17 +989,18 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32
} }
tDecoderClear(&decoder); tDecoderClear(&decoder);
tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task:0x%x at node %d, status %d", tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d",
rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
if (pTask == NULL) { if (pTask == NULL) {
tqError("tq failed to locate the stream task:0x%x vgId:%d, it may have been destroyed", rsp.upstreamTaskId, tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
pTq->pStreamMeta->vgId); pTq->pStreamMeta->vgId);
terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST;
return -1; return -1;
} }
code = streamProcessTaskCheckRsp(pTask, &rsp, sversion); code = streamProcessCheckRsp(pTask, &rsp);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return code; return code;
} }
@ -971,105 +1033,232 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms
tDecoderClear(&decoder); tDecoderClear(&decoder);
SStreamMeta* pStreamMeta = pTq->pStreamMeta;
// 2.save task, use the newest commit version as the initial start version of stream task. // 2.save task, use the newest commit version as the initial start version of stream task.
taosWLockLatch(&pTq->pStreamMeta->lock); taosWLockLatch(&pStreamMeta->lock);
code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask); code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask);
int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta);
int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta);
if (code < 0) { if (code < 0) {
tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks); tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks);
taosWUnLockLatch(&pTq->pStreamMeta->lock); taosWUnLockLatch(&pStreamMeta->lock);
return -1; return -1;
} }
taosWUnLockLatch(&pTq->pStreamMeta->lock); taosWUnLockLatch(&pStreamMeta->lock);
// 3.go through recover steps to fill history // 3. It's an fill history task, do nothing. wait for the main task to start it
if (pTask->fillHistory) { streamPrepareNdoCheckDownstream(pTask);
streamTaskCheckDownstream(pTask, sversion);
} tqDebug("vgId:%d s-task:%s is deployed and add into meta, status:%s, numOfTasks:%d", vgId, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks);
tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", vgId, pTask->id.idStr,
pTask->status.taskStatus, numOfTasks);
return 0; return 0;
} }
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
int32_t code; int32_t code = TSDB_CODE_SUCCESS;
char* msg = pMsg->pCont; char* msg = pMsg->pCont;
int32_t msgLen = pMsg->contLen;
SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg; SStreamMeta* pMeta = pTq->pStreamMeta;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId);
if (pTask == NULL) { if (pTask == NULL) {
tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed",
pMeta->vgId, pReq->taskId);
return -1; return -1;
} }
// check param // check param
int64_t fillVer1 = pTask->chkInfo.version; int64_t fillVer1 = pTask->chkInfo.version;
if (fillVer1 <= 0) { if (fillVer1 <= 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pMeta, pTask);
return -1; return -1;
} }
// do recovery step 1 // do recovery step 1
tqDebug("s-task:%s start non-blocking recover stage(step 1) scan", pTask->id.idStr); const char* pId = pTask->id.idStr;
tqDebug("s-task:%s start history data scan stage(step 1), status:%s", pId,
streamGetTaskStatusStr(pTask->status.taskStatus));
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE,
TASK_SCHED_STATUS__WAITING);
if (schedStatus != TASK_SCHED_STATUS__INACTIVE) {
ASSERT(0);
return 0;
}
streamSourceRecoverScanStep1(pTask); if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) {
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamSourceScanHistoryData(pTask);
tqDebug("s-task:%s is dropped, abort recover in step1", pTask->id.idStr); }
streamMetaReleaseTask(pTq->pStreamMeta, pTask); if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId);
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
streamMetaReleaseTask(pMeta, pTask);
return 0; return 0;
} }
double el = (taosGetTimestampMs() - st) / 1000.0; double el = (taosGetTimestampMs() - st) / 1000.0;
tqDebug("s-task:%s non-blocking recover stage(step 1) ended, elapsed time:%.2fs", pTask->id.idStr, el); tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", pId, el);
// build msg to launch next step if (pTask->info.fillHistory) {
SStreamRecoverStep2Req req; SVersionRange* pRange = NULL;
code = streamBuildSourceRecover2Req(pTask, &req); SStreamTask* pStreamTask = NULL;
if (code < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask); if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) {
return -1; // 1. stop the related stream task, get the current scan wal version of stream task, ver.
pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId);
if (pStreamTask == NULL) {
// todo handle error
}
ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE);
// wait for the stream task get ready for scan history data
while (((pStreamTask->status.downstreamReady == 0) && (pStreamTask->status.taskStatus != TASK_STATUS__STOP)) ||
pStreamTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
tqDebug(
"s-task:%s level:%d related stream task:%s not ready for halt, wait for it continue and recheck in 100ms",
pTask->id.idStr, pTask->info.taskLevel, pStreamTask->id.idStr);
taosMsleep(100);
}
// now we can stop the stream task execution
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr,
pStreamTask->info.taskLevel, pId);
// if it's an source task, extract the last version in wal.
pRange = &pTask->dataRange.range;
int64_t latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader);
ASSERT(latestVer >= pRange->maxVer);
int64_t nextStartVer = pRange->maxVer + 1;
if (nextStartVer > latestVer - 1) {
// no input data yet. no need to execute the secondardy scan while stream task halt
streamTaskRecoverSetAllStepFinished(pTask);
tqDebug("s-task:%s no need to perform secondary scan-history-data(step 2), since no data ingest during secondary scan", pId);
} else {
// 2. do secondary scan of the history data, the time window remain, and the version range is updated to
// [pTask->dataRange.range.maxVer, ver1]
pRange->minVer = nextStartVer;
pRange->maxVer = latestVer - 1;
}
}
if (!streamTaskRecoverScanStep1Finished(pTask)) {
tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64
" do secondary scan-history-data after halt the related stream task:%s",
pId, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pStreamTask->id.idStr);
ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING);
st = taosGetTimestampMs();
streamSetParamForStreamScannerStep2(pTask, pRange, &pTask->dataRange.window);
}
if (!streamTaskRecoverScanStep2Finished(pTask)) {
streamSourceScanHistoryData(pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId);
streamMetaReleaseTask(pMeta, pTask);
return 0;
}
streamTaskRecoverSetAllStepFinished(pTask);
}
el = (taosGetTimestampMs() - st) / 1000.0;
tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", pId, el);
// 3. notify the downstream tasks to transfer executor state after handle all history blocks.
if (!pTask->status.transferState) {
code = streamDispatchTransferStateMsg(pTask);
if (code != TSDB_CODE_SUCCESS) {
// todo handle error
}
pTask->status.transferState = true;
}
// 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task.
// 5. resume the related stream task.
streamTryExec(pTask);
pTask->status.taskStatus = TASK_STATUS__DROPPING;
tqDebug("s-task:%s scan-history-task set status to be dropping", pId);
streamMetaSaveTask(pMeta, pTask);
streamMetaSaveTask(pMeta, pStreamTask);
streamMetaReleaseTask(pMeta, pTask);
streamMetaReleaseTask(pMeta, pStreamTask);
taosWLockLatch(&pMeta->lock);
if (streamMetaCommit(pTask->pMeta) < 0) {
// persist to disk
}
taosWUnLockLatch(&pMeta->lock);
} else {
// todo update the chkInfo version for current task.
// this task has an associated history stream task, so we need to scan wal from the end version of
// history scan. The current version of chkInfo.current is not updated during the history scan
STimeWindow* pWindow = &pTask->dataRange.window;
if (pTask->historyTaskId.taskId == 0) {
*pWindow = (STimeWindow){INT64_MIN, INT64_MAX};
tqDebug("s-task:%s no associated task, reset the time window:%" PRId64 " - %" PRId64, pId, pWindow->skey,
pWindow->ekey);
} else {
tqDebug("s-task:%s history data scan completed, now start to scan data from wal, start ver:%" PRId64
", window:%" PRId64 " - %" PRId64,
pId, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey);
}
code = streamTaskScanHistoryDataComplete(pTask);
streamMetaReleaseTask(pMeta, pTask);
// let's start the stream task by extracting data from wal
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
tqStartStreamTasks(pTq);
}
return code;
} }
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
return 0;
}
// serialize msg
int32_t len = sizeof(SStreamRecoverStep1Req);
void* serializedReq = rpcMallocCont(len);
if (serializedReq == NULL) {
tqError("s-task:%s failed to prepare the step2 stage, out of memory", pTask->id.idStr);
return -1;
}
memcpy(serializedReq, &req, len);
// dispatch msg
tqDebug("s-task:%s start recover block stage", pTask->id.idStr);
SRpcMsg rpcMsg = {
.code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq};
tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
return 0; return 0;
} }
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { // notify the downstream tasks to transfer executor state after handle all history blocks.
int32_t code = 0; int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SStreamTransferReq req;
SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
int32_t code = tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
if (pTask == NULL) { if (pTask == NULL) {
tqError("failed to find task:0x%x, it may have been dropped already", req.taskId);
return -1; return -1;
} }
// do recovery step 2 // transfer the ownership of executor state
streamTaskReleaseState(pTask);
tqDebug("s-task:%s receive state transfer req", pTask->id.idStr);
SStreamTask* pStreamTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->streamTaskId.taskId);
streamTaskReloadState(pStreamTask);
ASSERT(pTask->streamTaskId.taskId != 0);
pTask->status.transferState = true; // persistent data?
#if 0
// do check if current task handle all data in the input queue
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st); tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st);
@ -1108,16 +1297,19 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t
tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el); tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el);
// dispatch recover finish req to all related downstream task // dispatch recover finish req to all related downstream task
code = streamDispatchRecoverFinishReq(pTask); code = streamDispatchScanHistoryFinishMsg(pTask);
if (code < 0) { if (code < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1; return -1;
} }
atomic_store_8(&pTask->fillHistory, 0); atomic_store_8(&pTask->info.fillHistory, 0);
streamMetaSaveTask(pTq->pStreamMeta, pTask); streamMetaSaveTask(pTq->pStreamMeta, pTask);
#endif
streamSchedExec(pTask);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0; return 0;
} }
@ -1130,7 +1322,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen); tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
tDecodeSStreamRecoverFinishReq(&decoder, &req); tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder); tDecoderClear(&decoder);
// find task // find task
@ -1139,7 +1331,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
return -1; return -1;
} }
// do process request // do process request
if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) { if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1; return -1;
} }
@ -1212,22 +1404,31 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
int32_t taskId = pReq->taskId; int32_t taskId = pReq->taskId;
int32_t vgId = TD_VID(pTq->pVnode); int32_t vgId = TD_VID(pTq->pVnode);
if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal if (taskId == STREAM_TASK_STATUS_CHECK_ID) {
tqStreamTasksStatusCheck(pTq);
return 0;
}
if (taskId == EXTRACT_DATA_FROM_WAL_ID) { // all tasks are extracted submit data from the wal
tqStreamTasksScanWal(pTq); tqStreamTasksScanWal(pTq);
return 0; return 0;
} }
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask != NULL) { if (pTask != NULL) {
if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { // even in halt status, the data in inputQ must be processed
tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr, int8_t status = pTask->status.taskStatus;
if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) {
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.version); pTask->chkInfo.version);
streamProcessRunReq(pTask); streamProcessRunReq(pTask);
} else { } else {
if (streamTaskShouldPause(&pTask->status)) { // if (streamTaskShouldPause(&pTask->status)) {
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
} // }
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId,
pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus);
} }
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
@ -1286,65 +1487,100 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgL
return 0; return 0;
} }
int32_t tqProcessTaskPauseImpl(SStreamMeta* pStreamMeta, SStreamTask* pTask) {
if (pTask) {
if (!streamTaskShouldPause(&pTask->status)) {
tqDebug("vgId:%d s-task:%s set pause flag", pStreamMeta->vgId, pTask->id.idStr);
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
}
streamMetaReleaseTask(pStreamMeta, pTask);
} else {
return -1;
}
return 0;
}
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg; SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
int32_t code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pTask);
if (code != 0) {
return code;
}
SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId);
if (pHistoryTask) {
code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pHistoryTask);
}
return code;
}
int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int8_t igUntreated) {
int32_t vgId = pTq->pStreamMeta->vgId;
if (pTask) { if (pTask) {
tqDebug("vgId:%d s-task:%s set pause flag", pTq->pStreamMeta->vgId, pTask->id.idStr); if (streamTaskShouldPause(&pTask->status)) {
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
// no lock needs to secure the access of the version
if (igUntreated && pTask->info.taskLevel == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
}
if (pTask->info.fillHistory && pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
streamStartRecoverTask(pTask, igUntreated);
} else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
tqStartStreamTasks(pTq);
} else {
streamSchedExec(pTask);
}
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
} else {
return -1;
} }
return 0; return 0;
} }
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
int32_t vgId = pTq->pStreamMeta->vgId;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
if (pTask) { int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated);
atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); if (code != 0) {
return code;
// no lock needs to secure the access of the version
if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
}
if (pTask->taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
tqStartStreamTasks(pTq);
} else {
streamSchedExec(pTask);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
} else {
tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId);
} }
return 0; SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId);
if (pHistoryTask) {
code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated);
}
return code;
} }
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
char* msgStr = pMsg->pCont; char* msgStr = pMsg->pCont;
char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
SDecoder decoder;
SStreamRetrieveReq req; SStreamRetrieveReq req;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
tDecodeStreamRetrieveReq(&decoder, &req); tDecodeStreamRetrieveReq(&decoder, &req);
tDecoderClear(&decoder); tDecoderClear(&decoder);
int32_t taskId = req.dstTaskId; int32_t taskId = req.dstTaskId;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask) { if (pTask) {
SRpcMsg rsp = {.info = pMsg->info, .code = 0}; SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessRetrieveReq(pTask, &req, &rsp); streamProcessRetrieveReq(pTask, &req, &rsp);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tDeleteStreamRetrieveReq(&req); tDeleteStreamRetrieveReq(&req);
return 0; return 0;
@ -1425,43 +1661,3 @@ FAIL:
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
int32_t tqStartStreamTasks(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
pMeta->walScanCounter += 1;
if (pMeta->walScanCounter > 1) {
tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = WAL_READ_TASKS_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}

View File

@ -114,7 +114,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) {
} }
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0); metaReaderDoInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0);
if (metaGetTableEntryByName(&mr, req.tbName) < 0) { if (metaGetTableEntryByName(&mr, req.tbName) < 0) {
metaReaderClear(&mr); metaReaderClear(&mr);
@ -1109,7 +1109,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) {
} }
SStreamTask* pTask = *(SStreamTask**)pIter; SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd);
if (code != 0) { if (code != 0) {
tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr);

View File

@ -16,6 +16,7 @@
#include "tq.h" #include "tq.h"
static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle);
static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId);
// this function should be executed by stream threads. // this function should be executed by stream threads.
// extract submit block from WAL, and add them into the input queue for the sources tasks. // extract submit block from WAL, and add them into the input queue for the sources tasks.
@ -57,7 +58,111 @@ int32_t tqStreamTasksScanWal(STQ* pTq) {
return 0; return 0;
} }
static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { int32_t tqStreamTasksStatusCheck(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
SArray* pTaskList = NULL;
taosWLockLatch(&pMeta->lock);
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
taosWUnLockLatch(&pMeta->lock);
for (int32_t i = 0; i < numOfTasks; ++i) {
int32_t* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, *pTaskId);
if (pTask == NULL) {
continue;
}
streamTaskCheckDownstreamTasks(pTask);
streamMetaReleaseTask(pMeta, pTask);
}
taosArrayDestroy(pTaskList);
return 0;
}
int32_t tqCheckStreamStatus(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d check for stream tasks status, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = STREAM_TASK_STATUS_CHECK_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
int32_t tqStartStreamTasks(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
pMeta->walScanCounter += 1;
if (pMeta->walScanCounter > 1) {
tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = EXTRACT_DATA_FROM_WAL_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
// seek the stored version and extract data from WAL // seek the stored version and extract data from WAL
int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader);
if (pTask->chkInfo.currentVer < firstVer) { if (pTask->chkInfo.currentVer < firstVer) {
@ -102,7 +207,7 @@ static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
*pScanIdle = true; *pScanIdle = true;
bool noNewDataInWal = true; bool noDataInWal = true;
int32_t vgId = pStreamMeta->vgId; int32_t vgId = pStreamMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList); int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList);
@ -129,15 +234,13 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
} }
int32_t status = pTask->status.taskStatus; int32_t status = pTask->status.taskStatus;
if (pTask->taskLevel != TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel != TASK_LEVEL__SOURCE) {
// tqTrace("s-task:%s level:%d not source task, no need to start", pTask->id.idStr, pTask->taskLevel);
streamMetaReleaseTask(pStreamMeta, pTask); streamMetaReleaseTask(pStreamMeta, pTask);
continue; continue;
} }
if (streamTaskShouldStop(&pTask->status) || status == TASK_STATUS__RECOVER_PREPARE || if (status != TASK_STATUS__NORMAL) {
status == TASK_STATUS__WAIT_DOWNSTREAM || streamTaskShouldPause(&pTask->status)) { tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status));
tqDebug("s-task:%s not ready for new submit block from wal, status:%d", pTask->id.idStr, status);
streamMetaReleaseTask(pStreamMeta, pTask); streamMetaReleaseTask(pStreamMeta, pTask);
continue; continue;
} }
@ -157,39 +260,47 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
continue; continue;
} }
int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue);
// append the data for the stream // append the data for the stream
SStreamQueueItem* pItem = NULL; SStreamQueueItem* pItem = NULL;
code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, pTask->id.idStr); code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, pTask->id.idStr);
if (code != TSDB_CODE_SUCCESS) { // failed, continue
if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue
streamMetaReleaseTask(pStreamMeta, pTask); streamMetaReleaseTask(pStreamMeta, pTask);
continue; continue;
} }
// delete ignore if (pItem != NULL) {
if (pItem == NULL) { noDataInWal = false;
streamMetaReleaseTask(pStreamMeta, pTask); code = tAppendDataToInputQueue(pTask, pItem);
continue; if (code == TSDB_CODE_SUCCESS) {
pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader);
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr,
pTask->chkInfo.currentVer);
} else {
tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr,
pTask->chkInfo.currentVer);
}
} }
noNewDataInWal = false; if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) {
code = streamSchedExec(pTask);
code = tqAddInputBlockNLaunchTask(pTask, pItem); if (code != TSDB_CODE_SUCCESS) {
if (code == TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask);
pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); return -1;
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, }
pTask->chkInfo.currentVer);
} else {
tqError("s-task:%s append input queue failed, ver:%" PRId64, pTask->id.idStr, pTask->chkInfo.currentVer);
} }
streamMetaReleaseTask(pStreamMeta, pTask); streamMetaReleaseTask(pStreamMeta, pTask);
} }
// all wal are checked, and no new data available in wal. // all wal are checked, and no new data available in wal.
if (noNewDataInWal) { if (noDataInWal) {
*pScanIdle = true; *pScanIdle = true;
} }
taosArrayDestroy(pTaskList); taosArrayDestroy(pTaskList);
return 0; return 0;
} }

View File

@ -48,7 +48,7 @@ static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp
static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) { static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) {
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pTq->pVnode->pMeta, 0); metaReaderDoInit(&mr, pTq->pVnode->pMeta, 0);
// TODO add reference to gurantee success // TODO add reference to gurantee success
if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) { if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) {

View File

@ -309,7 +309,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
tbData.uid = pTableSinkInfo->uid; tbData.uid = pTableSinkInfo->uid;
} else { } else {
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pVnode->pMeta, 0); metaReaderDoInit(&mr, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mr, ctbName) < 0) { if (metaGetTableEntryByName(&mr, ctbName) < 0) {
metaReaderClear(&mr); metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo); taosMemoryFree(pTableSinkInfo);
@ -412,7 +412,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
if (k == 0) { if (k == 0) {
SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex);
void* colData = colDataGetData(pColData, j); void* colData = colDataGetData(pColData, j);
tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); tqTrace("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData);
} }
if (IS_SET_NULL(pCol)) { if (IS_SET_NULL(pCol)) {
SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type);

View File

@ -20,12 +20,6 @@
static int32_t tqSendMetaPollRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, static int32_t tqSendMetaPollRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq,
const SMqMetaRsp* pRsp, int32_t vgId); const SMqMetaRsp* pRsp, int32_t vgId);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) {
char buf[128] = {0};
sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
return taosStrdup(buf);
}
int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem) { int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem) {
int32_t code = tAppendDataToInputQueue(pTask, pQueueItem); int32_t code = tAppendDataToInputQueue(pTask, pQueueItem);
if (code < 0) { if (code < 0) {

View File

@ -1610,7 +1610,7 @@ static tb_uid_t getTableSuidByUid(tb_uid_t uid, STsdb *pTsdb) {
tb_uid_t suid = 0; tb_uid_t suid = 0;
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pTsdb->pVnode->pMeta, 0); metaReaderDoInit(&mr, pTsdb->pVnode->pMeta, 0);
if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) { if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) {
metaReaderClear(&mr); // table not esist metaReaderClear(&mr); // table not esist
return 0; return 0;

View File

@ -248,7 +248,7 @@ static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdb
STbData* piMemTbData); STbData* piMemTbData);
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
int8_t* pLevel); int8_t* pLevel);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id);
static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader);
static int32_t doBuildDataBlock(STsdbReader* pReader); static int32_t doBuildDataBlock(STsdbReader* pReader);
static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader);
@ -775,7 +775,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void
pReader->order = pCond->order; pReader->order = pCond->order;
pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL;
pReader->verRange = getQueryVerRange(pVnode, pCond, level); pReader->verRange = getQueryVerRange(pVnode, pCond, idstr);
pReader->type = pCond->type; pReader->type = pCond->type;
pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket
@ -3721,7 +3721,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret
return VND_TSDB(pVnode); return VND_TSDB(pVnode);
} }
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) { SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id) {
int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
int64_t endVer = 0; int64_t endVer = 0;
@ -3732,6 +3732,9 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_
endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
} }
tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion,
pCond->endVersion, startVer, endVer, id);
return (SVersionRange){.minVer = startVer, .maxVer = endVer}; return (SVersionRange){.minVer = startVer, .maxVer = endVer};
} }
@ -5452,7 +5455,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) {
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode*)pVnode)->pMeta, 0); metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
@ -5584,57 +5587,3 @@ void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) {
void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
/*-------------todo:refactor the implementation of those APIs in this file to seperate the API into two files------*/
// opt perf, do NOT create so many readers
int64_t tsdbGetLastTimestamp(SVnode* pVnode, void* pTableList, int32_t numOfTables, const char* pIdStr) {
SQueryTableDataCond cond = {.type = TIMEWINDOW_RANGE_CONTAINED, .numOfCols = 1, .order = TSDB_ORDER_DESC,
.startVersion = -1, .endVersion = -1};
cond.twindows.skey = INT64_MIN;
cond.twindows.ekey = INT64_MAX;
cond.colList = taosMemoryCalloc(1, sizeof(SColumnInfo));
cond.pSlotList = taosMemoryMalloc(sizeof(int32_t) * cond.numOfCols);
if (cond.colList == NULL || cond.pSlotList == NULL) {
// todo
}
cond.colList[0].colId = 1;
cond.colList[0].slotId = 0;
cond.colList[0].type = TSDB_DATA_TYPE_TIMESTAMP;
cond.pSlotList[0] = 0;
STableKeyInfo* pTableKeyInfo = pTableList;
STsdbReader* pReader = NULL;
SSDataBlock* pBlock = createDataBlock();
SColumnInfoData data = {0};
data.info = (SColumnInfo) {.type = TSDB_DATA_TYPE_TIMESTAMP, .colId = 1, .bytes = TSDB_KEYSIZE};
blockDataAppendColInfo(pBlock, &data);
int64_t key = INT64_MIN;
for(int32_t i = 0; i < numOfTables; ++i) {
int32_t code = tsdbReaderOpen(pVnode, &cond, &pTableKeyInfo[i], 1, pBlock, (void**)&pReader, pIdStr, false, NULL);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
bool hasData = false;
code = tsdbNextDataBlock(pReader, &hasData);
if (!hasData || code != TSDB_CODE_SUCCESS) {
continue;
}
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, 0);
int64_t k = *(int64_t*)pCol->pData;
if (key < k) {
key = k;
}
tsdbReaderClose(pReader);
}
return 0;
}

View File

@ -528,25 +528,25 @@ void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *minK
*maxKey = *minKey + tsTickPerMin[precision] * minutes - 1; *maxKey = *minKey + tsTickPerMin[precision] * minutes - 1;
} }
int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now) { int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t nowSec) {
int32_t aFid[3]; int32_t aFid[3];
TSKEY key; TSKEY key;
if (pKeepCfg->precision == TSDB_TIME_PRECISION_MILLI) { if (pKeepCfg->precision == TSDB_TIME_PRECISION_MILLI) {
now = now * 1000; nowSec = nowSec * 1000;
} else if (pKeepCfg->precision == TSDB_TIME_PRECISION_MICRO) { } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_MICRO) {
now = now * 1000000l; nowSec = nowSec * 1000000l;
} else if (pKeepCfg->precision == TSDB_TIME_PRECISION_NANO) { } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_NANO) {
now = now * 1000000000l; nowSec = nowSec * 1000000000l;
} else { } else {
ASSERT(0); ASSERT(0);
} }
key = now - pKeepCfg->keep0 * tsTickPerMin[pKeepCfg->precision]; key = nowSec - pKeepCfg->keep0 * tsTickPerMin[pKeepCfg->precision];
aFid[0] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); aFid[0] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision);
key = now - pKeepCfg->keep1 * tsTickPerMin[pKeepCfg->precision]; key = nowSec - pKeepCfg->keep1 * tsTickPerMin[pKeepCfg->precision];
aFid[1] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); aFid[1] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision);
key = now - pKeepCfg->keep2 * tsTickPerMin[pKeepCfg->precision]; key = nowSec - pKeepCfg->keep2 * tsTickPerMin[pKeepCfg->precision];
aFid[2] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); aFid[2] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision);
if (fid >= aFid[0]) { if (fid >= aFid[0]) {
@ -640,7 +640,7 @@ SColVal *tsdbRowIterNext(STSDBRowIter *pIter) {
int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) {
int32_t code = 0; int32_t code = 0;
TSDBKEY key = TSDBROW_KEY(pRow); TSDBKEY key = TSDBROW_KEY(pRow);
SColVal * pColVal = &(SColVal){0}; SColVal *pColVal = &(SColVal){0};
STColumn *pTColumn; STColumn *pTColumn;
int32_t iCol, jCol = 1; int32_t iCol, jCol = 1;
@ -764,7 +764,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema)
} }
} }
int32_t tsdbRowMergerInit(SRowMerger* pMerger, STSchema *pSchema) { int32_t tsdbRowMergerInit(SRowMerger *pMerger, STSchema *pSchema) {
pMerger->pTSchema = pSchema; pMerger->pTSchema = pSchema;
pMerger->pArray = taosArrayInit(pSchema->numOfCols, sizeof(SColVal)); pMerger->pArray = taosArrayInit(pSchema->numOfCols, sizeof(SColVal));
if (pMerger->pArray == NULL) { if (pMerger->pArray == NULL) {
@ -774,7 +774,7 @@ int32_t tsdbRowMergerInit(SRowMerger* pMerger, STSchema *pSchema) {
} }
} }
void tsdbRowMergerClear(SRowMerger* pMerger) { void tsdbRowMergerClear(SRowMerger *pMerger) {
for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) {
SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol);
if (IS_VAR_DATA_TYPE(pTColVal->type)) { if (IS_VAR_DATA_TYPE(pTColVal->type)) {
@ -785,7 +785,7 @@ void tsdbRowMergerClear(SRowMerger* pMerger) {
taosArrayClear(pMerger->pArray); taosArrayClear(pMerger->pArray);
} }
void tsdbRowMergerCleanup(SRowMerger* pMerger) { void tsdbRowMergerCleanup(SRowMerger *pMerger) {
int32_t numOfCols = taosArrayGetSize(pMerger->pArray); int32_t numOfCols = taosArrayGetSize(pMerger->pArray);
for (int32_t iCol = 1; iCol < numOfCols; iCol++) { for (int32_t iCol = 1; iCol < numOfCols; iCol++) {
SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol);
@ -1041,8 +1041,6 @@ int32_t tsdbBuildDeleteSkyline2(SArray *aDelData, int32_t sidx, int32_t eidx, SA
// SBlockData ====================================================== // SBlockData ======================================================
int32_t tBlockDataCreate(SBlockData *pBlockData) { int32_t tBlockDataCreate(SBlockData *pBlockData) {
int32_t code = 0;
pBlockData->suid = 0; pBlockData->suid = 0;
pBlockData->uid = 0; pBlockData->uid = 0;
pBlockData->nRow = 0; pBlockData->nRow = 0;
@ -1051,7 +1049,7 @@ int32_t tBlockDataCreate(SBlockData *pBlockData) {
pBlockData->aTSKEY = NULL; pBlockData->aTSKEY = NULL;
pBlockData->nColData = 0; pBlockData->nColData = 0;
pBlockData->aColData = NULL; pBlockData->aColData = NULL;
return code; return 0;
} }
void tBlockDataDestroy(SBlockData *pBlockData) { void tBlockDataDestroy(SBlockData *pBlockData) {
@ -1107,8 +1105,8 @@ int32_t tBlockDataInit(SBlockData *pBlockData, TABLEID *pId, STSchema *pTSchema,
int32_t iColumn = 1; int32_t iColumn = 1;
STColumn *pTColumn = &pTSchema->columns[iColumn]; STColumn *pTColumn = &pTSchema->columns[iColumn];
for (int32_t iCid = 0; iCid < nCid; iCid++) { for (int32_t iCid = 0; iCid < nCid; iCid++) {
// aCid array (from taos client catalog) contains columns that does not exist in the pTSchema. the pTSchema is
// aCid array (from taos client catalog) contains columns that does not exist in the pTSchema. the pTSchema is newer // newer
if (pTColumn == NULL) { if (pTColumn == NULL) {
continue; continue;
} }
@ -1239,7 +1237,7 @@ int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTS
_exit: _exit:
return code; return code;
} }
static int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema) { int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema) {
int32_t code = 0; int32_t code = 0;
// version // version

View File

@ -203,6 +203,7 @@ void initStateStoreAPI(SStateStore* pStore) {
pStore->streamStateCommit = streamStateCommit; pStore->streamStateCommit = streamStateCommit;
pStore->streamStateDestroy = streamStateDestroy; pStore->streamStateDestroy = streamStateDestroy;
pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint; pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint;
pStore->streamStateReloadInfo = streamStateReloadInfo;
} }
void initMetaReaderAPI(SStoreMetaReader* pMetaReader) { void initMetaReaderAPI(SStoreMetaReader* pMetaReader) {

View File

@ -62,7 +62,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
} }
// query meta // query meta
metaReaderInit(&mer1, pVnode->pMeta, 0); metaReaderDoInit(&mer1, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mer1, infoReq.tbName) < 0) { if (metaGetTableEntryByName(&mer1, infoReq.tbName) < 0) {
code = terrno; code = terrno;
@ -79,7 +79,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
schemaTag = mer1.me.stbEntry.schemaTag; schemaTag = mer1.me.stbEntry.schemaTag;
metaRsp.suid = mer1.me.uid; metaRsp.suid = mer1.me.uid;
} else if (mer1.me.type == TSDB_CHILD_TABLE) { } else if (mer1.me.type == TSDB_CHILD_TABLE) {
metaReaderInit(&mer2, pVnode->pMeta, META_READER_NOLOCK); metaReaderDoInit(&mer2, pVnode->pMeta, META_READER_NOLOCK);
if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit; if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit;
strcpy(metaRsp.stbName, mer2.me.name); strcpy(metaRsp.stbName, mer2.me.name);
@ -175,7 +175,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
} }
// query meta // query meta
metaReaderInit(&mer1, pVnode->pMeta, 0); metaReaderDoInit(&mer1, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mer1, cfgReq.tbName) < 0) { if (metaGetTableEntryByName(&mer1, cfgReq.tbName) < 0) {
code = terrno; code = terrno;
@ -188,7 +188,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
code = TSDB_CODE_VND_HASH_MISMATCH; code = TSDB_CODE_VND_HASH_MISMATCH;
goto _exit; goto _exit;
} else if (mer1.me.type == TSDB_CHILD_TABLE) { } else if (mer1.me.type == TSDB_CHILD_TABLE) {
metaReaderInit(&mer2, pVnode->pMeta, 0); metaReaderDoInit(&mer2, pVnode->pMeta, 0);
if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit; if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit;
strcpy(cfgRsp.stbName, mer2.me.name); strcpy(cfgRsp.stbName, mer2.me.name);

View File

@ -402,10 +402,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
if (!syncUtilUserCommit(pMsg->msgType)) goto _exit; if (!syncUtilUserCommit(pMsg->msgType)) goto _exit;
if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE || pMsg->msgType == TDMT_STREAM_TASK_CHECK_RSP) {
if (tqCheckLogInWal(pVnode->pTq, ver)) return 0;
}
// skip header // skip header
pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
len = pMsg->contLen - sizeof(SMsgHead); len = pMsg->contLen - sizeof(SMsgHead);
@ -501,16 +497,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
goto _err; goto _err;
} }
} break; } break;
case TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE: {
if (tqProcessTaskRecover2Req(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
case TDMT_STREAM_TASK_CHECK_RSP: {
if (tqProcessStreamTaskCheckRsp(pVnode->pTq, ver, pReq, len) < 0) {
goto _err;
}
} break;
case TDMT_VND_ALTER_CONFIRM: case TDMT_VND_ALTER_CONFIRM:
needCommit = pVnode->config.hashChange; needCommit = pVnode->config.hashChange;
if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) {
@ -641,26 +627,49 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) {
// return tqProcessPollReq(pVnode->pTq, pMsg); // return tqProcessPollReq(pVnode->pTq, pMsg);
case TDMT_VND_TMQ_VG_WALINFO: case TDMT_VND_TMQ_VG_WALINFO:
return tqProcessVgWalInfoReq(pVnode->pTq, pMsg); return tqProcessVgWalInfoReq(pVnode->pTq, pMsg);
default:
vError("unknown msg type:%d in fetch queue", pMsg->msgType);
return TSDB_CODE_APP_ERROR;
}
}
int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) {
vTrace("vgId:%d, msg:%p in fetch queue is processing", pVnode->config.vgId, pMsg);
if ((pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_VND_TABLE_META || pMsg->msgType == TDMT_VND_TABLE_CFG ||
pMsg->msgType == TDMT_VND_BATCH_META) &&
!syncIsReadyForRead(pVnode->sync)) {
vnodeRedirectRpcMsg(pVnode, pMsg, terrno);
return 0;
}
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_RUN: case TDMT_STREAM_TASK_RUN:
return tqProcessTaskRunReq(pVnode->pTq, pMsg); return tqProcessTaskRunReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_DISPATCH: case TDMT_STREAM_TASK_DISPATCH:
return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true);
case TDMT_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_DISPATCH_RSP: case TDMT_STREAM_TASK_DISPATCH_RSP:
return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg);
case TDMT_STREAM_RETRIEVE: case TDMT_STREAM_RETRIEVE:
return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg);
case TDMT_STREAM_RETRIEVE_RSP: case TDMT_STREAM_RETRIEVE_RSP:
return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg);
case TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE: case TDMT_VND_STREAM_SCAN_HISTORY:
return tqProcessTaskRecover1Req(pVnode->pTq, pMsg); return tqProcessTaskScanHistory(pVnode->pTq, pMsg);
case TDMT_STREAM_RECOVER_FINISH: case TDMT_STREAM_TRANSFER_STATE: {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return tqProcessTaskTransferStateReq(pVnode->pTq, 0, pReq, len);
}
case TDMT_STREAM_SCAN_HISTORY_FINISH:
return tqProcessTaskRecoverFinishReq(pVnode->pTq, pMsg); return tqProcessTaskRecoverFinishReq(pVnode->pTq, pMsg);
case TDMT_STREAM_RECOVER_FINISH_RSP: case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
return tqProcessTaskRecoverFinishRsp(pVnode->pTq, pMsg); return tqProcessTaskRecoverFinishRsp(pVnode->pTq, pMsg);
default: default:
vError("unknown msg type:%d in fetch queue", pMsg->msgType); vError("unknown msg type:%d in stream queue", pMsg->msgType);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
} }
@ -1695,7 +1704,7 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
tDecodeSBatchDeleteReq(&decoder, &deleteReq); tDecodeSBatchDeleteReq(&decoder, &deleteReq);
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderInit(&mr, pVnode->pMeta, META_READER_NOLOCK); metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK);
int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); int32_t sz = taosArrayGetSize(deleteReq.deleteReqs);
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {

View File

@ -554,7 +554,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId); vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId);
} else { } else {
vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId);
tqStartStreamTasks(pVnode->pTq); tqCheckStreamStatus(pVnode->pTq);
} }
} }

View File

@ -392,7 +392,7 @@ static int32_t setAliveResultIntoDataBlock(int64_t* pConnId, SSDataBlock* pBlock
int32_t status = 0; int32_t status = 0;
int32_t code = getAliveStatusFromApi(pConnId, dbName, &status); int32_t code = getAliveStatusFromApi(pConnId, dbName, &status);
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
colDataAppend(pCol1, 0, (const char*)&status, false); colDataSetVal(pCol1, 0, (const char*)&status, false);
} }
return code; return code;
} }

View File

@ -285,6 +285,8 @@ typedef struct SStreamAggSupporter {
int16_t stateKeyType; int16_t stateKeyType;
SDiskbasedBuf* pResultBuf; SDiskbasedBuf* pResultBuf;
SStateStore stateStore; SStateStore stateStore;
STimeWindow winRange;
SStorageAPI* pSessionAPI;
} SStreamAggSupporter; } SStreamAggSupporter;
typedef struct SWindowSupporter { typedef struct SWindowSupporter {
@ -503,6 +505,8 @@ typedef struct SStreamSessionAggOperatorInfo {
SArray* pUpdated; SArray* pUpdated;
SSHashObj* pStUpdated; SSHashObj* pStUpdated;
int64_t dataVersion; int64_t dataVersion;
SArray* historyWins;
bool isHistoryOp;
} SStreamSessionAggOperatorInfo; } SStreamSessionAggOperatorInfo;
typedef struct SStreamStateAggOperatorInfo { typedef struct SStreamStateAggOperatorInfo {
@ -522,6 +526,8 @@ typedef struct SStreamStateAggOperatorInfo {
SArray* pUpdated; SArray* pUpdated;
SSHashObj* pSeUpdated; SSHashObj* pSeUpdated;
int64_t dataVersion; int64_t dataVersion;
bool isHistoryOp;
SArray* historyWins;
} SStreamStateAggOperatorInfo; } SStreamStateAggOperatorInfo;
typedef struct SStreamPartitionOperatorInfo { typedef struct SStreamPartitionOperatorInfo {
@ -678,6 +684,8 @@ void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExpr
void doClearBufferedBlocks(SStreamScanInfo* pInfo); void doClearBufferedBlocks(SStreamScanInfo* pInfo);
uint64_t calcGroupId(char* pData, int32_t len); uint64_t calcGroupId(char* pData, int32_t len);
void streamOpReleaseState(struct SOperatorInfo* pOperator);
void streamOpReloadState(struct SOperatorInfo* pOperator);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -35,6 +35,7 @@ typedef SSDataBlock* (*__optr_fn_t)(struct SOperatorInfo* pOptr);
typedef void (*__optr_close_fn_t)(void* param); typedef void (*__optr_close_fn_t)(void* param);
typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len); typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len);
typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr); typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr);
typedef void (*__optr_state_fn_t)(struct SOperatorInfo* pOptr);
typedef struct SOperatorFpSet { typedef struct SOperatorFpSet {
__optr_open_fn_t _openFn; // DO NOT invoke this function directly __optr_open_fn_t _openFn; // DO NOT invoke this function directly
@ -45,6 +46,8 @@ typedef struct SOperatorFpSet {
__optr_encode_fn_t encodeResultRow; __optr_encode_fn_t encodeResultRow;
__optr_decode_fn_t decodeResultRow; __optr_decode_fn_t decodeResultRow;
__optr_explain_fn_t getExplainFn; __optr_explain_fn_t getExplainFn;
__optr_state_fn_t releaseStreamStateFn;
__optr_state_fn_t reloadStreamStateFn;
} SOperatorFpSet; } SOperatorFpSet;
enum { enum {
@ -126,13 +129,13 @@ SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode
SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle);
SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);
SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo);
@ -143,6 +146,7 @@ SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNo
SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup,
__optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain); __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain);
void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn);
int32_t optrDummyOpenFn(SOperatorInfo* pOperator); int32_t optrDummyOpenFn(SOperatorInfo* pOperator);
int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num); int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num);
void setOperatorCompleted(SOperatorInfo* pOperator); void setOperatorCompleted(SOperatorInfo* pOperator);

View File

@ -62,10 +62,12 @@ typedef struct {
SSchemaWrapper* schema; SSchemaWrapper* schema;
char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor
int8_t recoverStep; int8_t recoverStep;
bool recoverStep1Finished;
bool recoverStep2Finished;
int8_t recoverScanFinished; int8_t recoverScanFinished;
SQueryTableDataCond tableCond; SQueryTableDataCond tableCond;
int64_t fillHistoryVer1; SVersionRange fillHistoryVer;
int64_t fillHistoryVer2; STimeWindow fillHistoryWindow;
SStreamState* pState; SStreamState* pState;
int64_t dataVersion; int64_t dataVersion;
int64_t checkPointId; int64_t checkPointId;

View File

@ -92,6 +92,7 @@ static int32_t doSetStreamOpOpen(SOperatorInfo* pOperator, char* id) {
qError("join not supported for stream block scan, %s" PRIx64, id); qError("join not supported for stream block scan, %s" PRIx64, id);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
pOperator->status = OP_NOT_OPENED; pOperator->status = OP_NOT_OPENED;
return doSetStreamOpOpen(pOperator->pDownstream[0], id); return doSetStreamOpOpen(pOperator->pDownstream[0], id);
} }
@ -115,6 +116,16 @@ void resetTaskInfo(qTaskInfo_t tinfo) {
clearStreamBlock(pTaskInfo->pRoot); clearStreamBlock(pTaskInfo->pRoot);
} }
void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tinfo;
if (pTaskInfo == NULL) {
return;
}
qDebug("%s set fill history start key:%"PRId64, GET_TASKID(pTaskInfo), INT64_MIN);
pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN;
}
static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, const char* id) { static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, const char* id) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
if (pOperator->numOfDownstream == 0) { if (pOperator->numOfDownstream == 0) {
@ -130,10 +141,9 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu
return doSetStreamBlock(pOperator->pDownstream[0], input, numOfBlocks, type, id); return doSetStreamBlock(pOperator->pDownstream[0], input, numOfBlocks, type, id);
} else { } else {
pOperator->status = OP_NOT_OPENED; pOperator->status = OP_NOT_OPENED;
SStreamScanInfo* pInfo = pOperator->info; SStreamScanInfo* pInfo = pOperator->info;
qDebug("s-task:%s in this batch, all %d blocks need to be processed and dump results", id, (int32_t)numOfBlocks); qDebug("s-task:%s in this batch, %d blocks need to be processed", id, (int32_t)numOfBlocks);
ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0); ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0);
if (type == STREAM_INPUT__MERGED_SUBMIT) { if (type == STREAM_INPUT__MERGED_SUBMIT) {
@ -265,6 +275,7 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int3
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL; return NULL;
} }
pTaskInfo->pRoot = createRawScanOperatorInfo(pReaderHandle, pTaskInfo); pTaskInfo->pRoot = createRawScanOperatorInfo(pReaderHandle, pTaskInfo);
if (NULL == pTaskInfo->pRoot) { if (NULL == pTaskInfo->pRoot) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -314,8 +325,8 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v
return NULL; return NULL;
} }
struct SSubplan* pPlan = NULL; SSubplan* pPlan = NULL;
int32_t code = qStringToSubplan(msg, &pPlan); int32_t code = qStringToSubplan(msg, &pPlan);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
terrno = code; terrno = code;
return NULL; return NULL;
@ -869,19 +880,41 @@ int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner) {
} }
} }
int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver) { int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM); ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
pTaskInfo->streamInfo.fillHistoryVer1 = ver;
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE1; SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
pStreamInfo->fillHistoryVer = *pVerRange;
pStreamInfo->fillHistoryWindow = *pWindow;
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE1;
pStreamInfo->recoverStep1Finished = false;
pStreamInfo->recoverStep2Finished = false;
qDebug("%s step 1. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64
" - %" PRId64,
GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey,
pWindow->ekey);
return 0; return 0;
} }
int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver) { int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM); ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
pTaskInfo->streamInfo.fillHistoryVer2 = ver;
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE2; SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
pStreamInfo->fillHistoryVer = *pVerRange;
pStreamInfo->fillHistoryWindow = *pWindow;
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE2;
pStreamInfo->recoverStep1Finished = true;
pStreamInfo->recoverStep2Finished = false;
qDebug("%s step 2. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64
" - %" PRId64,
GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey,
pWindow->ekey);
return 0; return 0;
} }
@ -892,55 +925,58 @@ int32_t qStreamRecoverFinish(qTaskInfo_t tinfo) {
return 0; return 0;
} }
int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) { int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
SOperatorInfo* pOperator = pTaskInfo->pRoot; SOperatorInfo* pOperator = pTaskInfo->pRoot;
while (1) { while (1) {
if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || int32_t type = pOperator->operatorType;
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL || if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) { type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info; SStreamIntervalOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || STimeWindowAggSupp* pSup = &pInfo->twAggSup;
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0);
qInfo("save stream param for interval: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; qInfo("save stream param for interval: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; pSup->calTriggerSaved = pSup->calTrigger;
pInfo->twAggSup.deleteMark = INT64_MAX; pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false; pInfo->ignoreExpiredData = false;
} else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION || } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info; SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || STimeWindowAggSupp* pSup = &pInfo->twAggSup;
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0); ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
qInfo("save stream param for session: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; qInfo("save stream param for session: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; pSup->calTriggerSaved = pSup->calTrigger;
pInfo->twAggSup.deleteMark = INT64_MAX; pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false; pInfo->ignoreExpiredData = false;
} else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) { } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
SStreamStateAggOperatorInfo* pInfo = pOperator->info; SStreamStateAggOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || STimeWindowAggSupp* pSup = &pInfo->twAggSup;
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0);
qInfo("save stream param for state: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; qInfo("save stream param for state: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; pSup->calTriggerSaved = pSup->calTrigger;
pInfo->twAggSup.deleteMark = INT64_MAX; pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false; pInfo->ignoreExpiredData = false;
} }
@ -961,7 +997,7 @@ int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) {
return 0; return 0;
} }
int32_t qStreamRestoreParam(qTaskInfo_t tinfo) { int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
SOperatorInfo* pOperator = pTaskInfo->pRoot; SOperatorInfo* pOperator = pTaskInfo->pRoot;
@ -1009,6 +1045,26 @@ bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) {
return pTaskInfo->streamInfo.recoverScanFinished; return pTaskInfo->streamInfo.recoverScanFinished;
} }
bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
return pTaskInfo->streamInfo.recoverStep1Finished;
}
bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
return pTaskInfo->streamInfo.recoverStep2Finished;
}
int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
pTaskInfo->streamInfo.recoverStep1Finished = true;
pTaskInfo->streamInfo.recoverStep2Finished = true;
// reset the time window
pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN;
return 0;
}
void* qExtractReaderFromStreamScanner(void* scanner) { void* qExtractReaderFromStreamScanner(void* scanner) {
SStreamScanInfo* pInfo = scanner; SStreamScanInfo* pInfo = scanner;
return (void*)pInfo->tqReader; return (void*)pInfo->tqReader;
@ -1323,4 +1379,16 @@ SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo) {
SOperatorInfo* pOperator = pTaskInfo->pRoot; SOperatorInfo* pOperator = pTaskInfo->pRoot;
extractTableList(pArray, pOperator); extractTableList(pArray, pOperator);
return pArray; return pArray;
} }
int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo;
pTaskInfo->pRoot->fpSet.releaseStreamStateFn(pTaskInfo->pRoot);
return 0;
}
int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo;
pTaskInfo->pRoot->fpSet.reloadStreamStateFn(pTaskInfo->pRoot);
return 0;
}

View File

@ -540,140 +540,12 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD
} }
int8_t* pIndicator = (int8_t*)p->pData; int8_t* pIndicator = (int8_t*)p->pData;
int32_t totalRows = pBlock->info.rows;
if (status == FILTER_RESULT_ALL_QUALIFIED) { if (status == FILTER_RESULT_ALL_QUALIFIED) {
// here nothing needs to be done // here nothing needs to be done
} else if (status == FILTER_RESULT_NONE_QUALIFIED) { } else if (status == FILTER_RESULT_NONE_QUALIFIED) {
pBlock->info.rows = 0; pBlock->info.rows = 0;
} else { } else {
int32_t bmLen = BitmapLen(totalRows); trimDataBlock(pBlock, pBlock->info.rows, (bool*) pIndicator);
char* pBitmap = NULL;
int32_t maxRows = 0;
size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
// it is a reserved column for scalar function, and no data in this column yet.
if (pDst->pData == NULL) {
continue;
}
int32_t numOfRows = 0;
if (IS_VAR_DATA_TYPE(pDst->info.type)) {
int32_t j = 0;
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_var(pDst, j)) {
colDataSetNull_var(pDst, numOfRows);
} else {
char* p1 = colDataGetVarData(pDst, j);
colDataReassignVal(pDst, numOfRows, j, p1);
}
numOfRows += 1;
j += 1;
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
} else {
if (pBitmap == NULL) {
pBitmap = taosMemoryCalloc(1, bmLen);
}
memcpy(pBitmap, pDst->nullbitmap, bmLen);
memset(pDst->nullbitmap, 0, bmLen);
int32_t j = 0;
switch (pDst->info.type) {
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_TIMESTAMP:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
}
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
}
pBlock->info.rows = maxRows;
if (pBitmap != NULL) {
taosMemoryFree(pBitmap);
}
} }
} }
@ -1105,9 +977,11 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
int32_t size = 0; int32_t size = 0;
void* pVal = NULL; void* pVal = NULL;
int32_t code = pAPI->stateStore.streamStateSessionGet(pState, pKey, &pVal, &size); int32_t code = pAPI->stateStore.streamStateSessionGet(pState, pKey, &pVal, &size);
ASSERT(code == 0); // ASSERT(code == 0);
if (code == -1) { if (code == -1) {
// coverity scan // for history
qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey,
pKey->win.ekey, pKey->groupId);
pGroupResInfo->index += 1; pGroupResInfo->index += 1;
continue; continue;
} }
@ -1177,40 +1051,16 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
void qStreamCloseTsdbReader(void* task) { void streamOpReleaseState(SOperatorInfo* pOperator) {
if (task == NULL) { SOperatorInfo* downstream = pOperator->pDownstream[0];
return; if (downstream->fpSet.releaseStreamStateFn) {
} downstream->fpSet.releaseStreamStateFn(downstream);
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)task;
SOperatorInfo* pOp = pTaskInfo->pRoot;
qDebug("stream close tsdb reader, reset status uid:%" PRId64 " ts:%" PRId64, pTaskInfo->streamInfo.currentOffset.uid,
pTaskInfo->streamInfo.currentOffset.ts);
// todo refactor, other thread may already use this read to extract data.
pTaskInfo->streamInfo.currentOffset = (STqOffsetVal){0};
while (pOp->numOfDownstream == 1 && pOp->pDownstream[0]) {
SOperatorInfo* pDownstreamOp = pOp->pDownstream[0];
if (pDownstreamOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
SStreamScanInfo* pInfo = pDownstreamOp->info;
if (pInfo->pTableScanOp) {
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
setOperatorCompleted(pInfo->pTableScanOp);
while (pTaskInfo->owner != 0) {
taosMsleep(100);
qDebug("wait for the reader stopping");
}
pTaskInfo->storageAPI.tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
pTSInfo->base.dataReader = NULL;
// restore the status, todo refactor.
pInfo->pTableScanOp->status = OP_OPENED;
pTaskInfo->status = TASK_NOT_COMPLETED;
return;
}
}
} }
} }
void streamOpReloadState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}

View File

@ -1562,6 +1562,7 @@ SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFi
pTaskInfo); pTaskInfo);
pOperator->fpSet = pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, doStreamFill, NULL, destroyStreamFillOperatorInfo, optrDefaultBufFn, NULL); createOperatorFpSet(optrDummyOpenFn, doStreamFill, NULL, destroyStreamFillOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState);
code = appendDownstream(pOperator, &downstream, 1); code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {

View File

@ -1036,7 +1036,7 @@ void appendCreateTableRow(void* pState, SExprSupp* pTableSup, SExprSupp* pTagSup
} }
void* pGpIdCol = taosArrayGet(pDestBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX); void* pGpIdCol = taosArrayGet(pDestBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX);
colDataAppend(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false); colDataSetVal(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false);
pDestBlock->info.rows++; pDestBlock->info.rows++;
blockDataDestroy(pTmpBlock); blockDataDestroy(pTmpBlock);
} else { } else {
@ -1324,6 +1324,7 @@ SOperatorInfo* createStreamPartitionOperatorInfo(SOperatorInfo* downstream, SStr
pOperator->exprSupp.pExprInfo = pExprInfo; pOperator->exprSupp.pExprInfo = pExprInfo;
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamHashPartition, NULL, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamHashPartition, NULL,
destroyStreamPartitionOperatorInfo, optrDefaultBufFn, NULL); destroyStreamPartitionOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState);
initParDownStream(downstream, &pInfo->partitionSup, &pInfo->scalarSup); initParDownStream(downstream, &pInfo->partitionSup, &pInfo->scalarSup);
code = appendDownstream(pOperator, &downstream, 1); code = appendDownstream(pOperator, &downstream, 1);

View File

@ -38,11 +38,18 @@ SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn,
.closeFn = closeFn, .closeFn = closeFn,
.reqBufFn = reqBufFn, .reqBufFn = reqBufFn,
.getExplainFn = explain, .getExplainFn = explain,
.releaseStreamStateFn = NULL,
.reloadStreamStateFn = NULL,
}; };
return fpSet; return fpSet;
} }
void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn) {
pOperator->fpSet.releaseStreamStateFn = relaseFn;
pOperator->fpSet.reloadStreamStateFn = reloadFn;
}
int32_t optrDummyOpenFn(SOperatorInfo* pOperator) { int32_t optrDummyOpenFn(SOperatorInfo* pOperator) {
OPTR_SET_OPENED(pOperator); OPTR_SET_OPENED(pOperator);
pOperator->cost.openCost = 0; pOperator->cost.openCost = 0;
@ -485,13 +492,13 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;
pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo); pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) {
pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) {
int32_t children = 0; int32_t children = 0;
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) {
int32_t children = pHandle->numOfVgroups; int32_t children = pHandle->numOfVgroups;
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) {
pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo); pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) {
@ -500,7 +507,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode; SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode;
pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo); pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) {
pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) {
pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo); pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) { } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) {

View File

@ -73,6 +73,20 @@ static void destroyIndefinitOperatorInfo(void* param) {
taosMemoryFreeClear(param); taosMemoryFreeClear(param);
} }
void streamOperatorReleaseState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void streamOperatorReloadState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode,
SExecTaskInfo* pTaskInfo) { SExecTaskInfo* pTaskInfo) {
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
@ -136,6 +150,7 @@ SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhys
pTaskInfo); pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doProjectOperation, NULL, destroyProjectOperatorInfo, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doProjectOperation, NULL, destroyProjectOperatorInfo,
optrDefaultBufFn, NULL); optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOperatorReleaseState, streamOperatorReloadState);
code = appendDownstream(pOperator, &downstream, 1); code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {

View File

@ -44,6 +44,7 @@ int32_t scanDebug = 0;
#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) #define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN)
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) #define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
#define STREAM_SCAN_OP_NAME "StreamScanOperator" #define STREAM_SCAN_OP_NAME "StreamScanOperator"
#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState"
typedef struct STableMergeScanExecInfo { typedef struct STableMergeScanExecInfo {
SFileBlockLoadRecorder blockRecorder; SFileBlockLoadRecorder blockRecorder;
@ -489,12 +490,12 @@ int32_t addTagPseudoColumnData(SReadHandle* pHandle, const SExprInfo* pExpr, int
} }
int32_t code = 0; int32_t code = 0;
bool freeReader = false;
// backup the rows // backup the rows
int32_t backupRows = pBlock->info.rows; int32_t backupRows = pBlock->info.rows;
pBlock->info.rows = rows; pBlock->info.rows = rows;
bool freeReader = false;
STableCachedVal val = {0}; STableCachedVal val = {0};
SMetaReader mr = {0}; SMetaReader mr = {0};
@ -1558,13 +1559,13 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows); blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows);
pInfo->pRes->info.rows = pBlock->info.rows; pBlockInfo->rows = pBlock->info.rows;
pInfo->pRes->info.id.uid = pBlock->info.id.uid; pBlockInfo->id.uid = pBlock->info.id.uid;
pInfo->pRes->info.type = STREAM_NORMAL; pBlockInfo->type = STREAM_NORMAL;
pInfo->pRes->info.version = pBlock->info.version; pBlockInfo->version = pBlock->info.version;
STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info;
pInfo->pRes->info.id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid); pBlockInfo->id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid);
// todo extract method // todo extract method
for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) { for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) {
@ -1594,7 +1595,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
// currently only the tbname pseudo column // currently only the tbname pseudo column
if (pInfo->numOfPseudoExpr > 0) { if (pInfo->numOfPseudoExpr > 0) {
int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes, int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes,
pInfo->pRes->info.rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache); pBlockInfo->rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache);
// ignore the table not exists error, since this table may have been dropped during the scan procedure. // ignore the table not exists error, since this table may have been dropped during the scan procedure.
if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_PAR_TABLE_NOT_EXIST) { if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_PAR_TABLE_NOT_EXIST) {
blockDataFreeRes((SSDataBlock*)pBlock); blockDataFreeRes((SSDataBlock*)pBlock);
@ -1611,7 +1612,6 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
pInfo->pRes->info.dataLoad = 1; pInfo->pRes->info.dataLoad = 1;
blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex); blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex);
// blockDataFreeRes((SSDataBlock*)pBlock);
calBlockTbName(pInfo, pInfo->pRes); calBlockTbName(pInfo, pInfo->pRes);
return 0; return 0;
@ -1769,7 +1769,7 @@ void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo)
return; return;
} }
void* pUpInfo = pInfo->stateStore.updateInfoInit(0, TSDB_TIME_PRECISION_MILLI, 0); void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo); int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo);
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
pInfo->pUpdateInfo = pUpInfo; pInfo->pUpdateInfo = pUpInfo;
@ -1783,25 +1783,28 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
SStorageAPI* pAPI = &pTaskInfo->storageAPI; SStorageAPI* pAPI = &pTaskInfo->storageAPI;
SStreamScanInfo* pInfo = pOperator->info; SStreamScanInfo* pInfo = pOperator->info;
SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
qDebug("stream scan started, %s", GET_TASKID(pTaskInfo)); qDebug("stream scan started, %s", id);
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1 || if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1 || pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE2) {
pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE2) {
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
memcpy(&pTSInfo->base.cond, &pTaskInfo->streamInfo.tableCond, sizeof(SQueryTableDataCond)); memcpy(&pTSInfo->base.cond, &pStreamInfo->tableCond, sizeof(SQueryTableDataCond));
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1) {
pTSInfo->base.cond.startVersion = 0; if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1) {
pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer1; pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer;
qDebug("stream recover step1, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion, pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer;
pTSInfo->base.cond.endVersion);
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN1; pTSInfo->base.cond.twindows = pStreamInfo->fillHistoryWindow;
qDebug("stream recover step1, verRange:%" PRId64 "-%" PRId64 " window:%"PRId64"-%"PRId64", %s", pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion, pTSInfo->base.cond.twindows.skey, pTSInfo->base.cond.twindows.ekey, id);
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN1;
} else { } else {
pTSInfo->base.cond.startVersion = pTaskInfo->streamInfo.fillHistoryVer1 + 1; pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer;
pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer2; pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer;
qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion, qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64", %s", pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion); pTSInfo->base.cond.endVersion, id);
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN2; pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN2;
} }
pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
@ -1811,11 +1814,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pTSInfo->scanTimes = 0; pTSInfo->scanTimes = 0;
pTSInfo->currentGroupId = -1; pTSInfo->currentGroupId = -1;
pTaskInfo->streamInfo.recoverScanFinished = false; pStreamInfo->recoverScanFinished = false;
} }
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1 || if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 ||
pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN2) { pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) {
if (pInfo->blockRecoverContiCnt > 100) { if (pInfo->blockRecoverContiCnt > 100) {
pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt;
pInfo->blockRecoverContiCnt = 0; pInfo->blockRecoverContiCnt = 0;
@ -1866,11 +1869,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pInfo->blockRecoverContiCnt++; pInfo->blockRecoverContiCnt++;
calBlockTbName(pInfo, pInfo->pRecoverRes); calBlockTbName(pInfo, pInfo->pRecoverRes);
if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) {
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1) { if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) {
TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex); TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex);
pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs);
} else { } else {
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pTaskInfo->streamInfo.fillHistoryVer2); pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer);
doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes); doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes);
} }
} }
@ -1884,7 +1887,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
printDataBlock(pInfo->pRecoverRes, "scan recover"); printDataBlock(pInfo->pRecoverRes, "scan recover");
return pInfo->pRecoverRes; return pInfo->pRecoverRes;
} }
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; pStreamInfo->recoverStep = STREAM_RECOVER_STEP__NONE;
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
@ -1893,7 +1896,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pTSInfo->base.cond.startVersion = -1; pTSInfo->base.cond.startVersion = -1;
pTSInfo->base.cond.endVersion = -1; pTSInfo->base.cond.endVersion = -1;
pTaskInfo->streamInfo.recoverScanFinished = true; pStreamInfo->recoverScanFinished = true;
return NULL; return NULL;
} }
@ -1912,7 +1915,7 @@ FETCH_NEXT_BLOCK:
SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current); SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current);
SSDataBlock* pBlock = pPacked->pDataBlock; SSDataBlock* pBlock = pPacked->pDataBlock;
if (pBlock->info.parTbName[0]) { if (pBlock->info.parTbName[0]) {
pAPI->stateStore.streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName); pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName);
} }
// TODO move into scan // TODO move into scan
@ -2092,11 +2095,39 @@ FETCH_NEXT_BLOCK:
doCheckUpdate(pInfo, pBlockInfo->window.ekey, pBlock); doCheckUpdate(pInfo, pBlockInfo->window.ekey, pBlock);
doFilter(pBlock, pOperator->exprSupp.pFilterInfo, NULL); doFilter(pBlock, pOperator->exprSupp.pFilterInfo, NULL);
{ // do additional time window filter
STimeWindow* pWindow = &pStreamInfo->fillHistoryWindow;
if (pWindow->skey != INT64_MIN) {
qDebug("%s filter for additional history window, skey:%"PRId64, id, pWindow->skey);
bool* p = taosMemoryCalloc(pBlock->info.rows, sizeof(bool));
bool hasUnqualified = false;
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex);
for(int32_t i = 0; i < pBlock->info.rows; ++i) {
int64_t* ts = (int64_t*) colDataGetData(pCol, i);
p[i] = (*ts >= pWindow->skey);
if (!p[i]) {
hasUnqualified = true;
}
}
if (hasUnqualified) {
trimDataBlock(pBlock, pBlock->info.rows, p);
}
taosMemoryFree(p);
}
}
pBlock->info.dataLoad = 1; pBlock->info.dataLoad = 1;
blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex); blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex);
qDebug("%" PRId64 " rows in datablock, update res:%" PRId64 " %s", pBlockInfo->rows, qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows,
pInfo->pUpdateDataRes->info.rows, id); pInfo->pUpdateDataRes->info.rows);
if (pBlockInfo->rows > 0 || pInfo->pUpdateDataRes->info.rows > 0) { if (pBlockInfo->rows > 0 || pInfo->pUpdateDataRes->info.rows > 0) {
break; break;
} }
@ -2294,6 +2325,57 @@ static void destroyStreamScanOperatorInfo(void* param) {
taosMemoryFree(pStreamScan); taosMemoryFree(pStreamScan);
} }
void streamScanReleaseState(SOperatorInfo* pOperator) {
SStreamScanInfo* pInfo = pOperator->info;
if (!pInfo->pState) {
return;
}
if (!pInfo->pUpdateInfo) {
return;
}
int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo);
void* pBuff = taosMemoryCalloc(1, len);
pInfo->stateStore.updateInfoSerialize(pBuff, len, pInfo->pUpdateInfo);
pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), pBuff, len);
taosMemoryFree(pBuff);
}
void streamScanReloadState(SOperatorInfo* pOperator) {
SStreamScanInfo* pInfo = pOperator->info;
if (!pInfo->pState) {
return;
}
void* pBuff = NULL;
int32_t len = 0;
pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), &pBuff, &len);
SUpdateInfo* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo);
taosMemoryFree(pBuff);
if (code == TSDB_CODE_SUCCESS && pInfo->pUpdateInfo) {
if (pInfo->pUpdateInfo->minTS < 0) {
pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo);
pInfo->pUpdateInfo = pUpInfo;
} else {
pInfo->pUpdateInfo->minTS = TMAX(pInfo->pUpdateInfo->minTS, pUpInfo->minTS);
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pUpInfo->maxDataVersion);
SHashObj* curMap = pInfo->pUpdateInfo->pMap;
void *pIte = taosHashIterate(curMap, NULL);
while (pIte != NULL) {
size_t keySize = 0;
int64_t* pUid = taosHashGetKey(pIte, &keySize);
taosHashPut(pUpInfo->pMap, pUid, sizeof(int64_t), pIte, sizeof(TSKEY));
pIte = taosHashIterate(curMap, pIte);
}
taosHashCleanup(curMap);
pInfo->pUpdateInfo->pMap = pUpInfo->pMap;
pUpInfo->pMap = NULL;
pInfo->stateStore.updateInfoDestroy(pUpInfo);
}
} else {
pInfo->stateStore.updateInfoDestroy(pUpInfo);
}
}
SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond,
STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) {
SArray* pColIds = NULL; SArray* pColIds = NULL;
@ -2475,6 +2557,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys
__optr_fn_t nextFn = (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) ? doStreamScan : doQueueScan; __optr_fn_t nextFn = (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) ? doStreamScan : doQueueScan;
pOperator->fpSet = pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamScanReleaseState, streamScanReloadState);
return pOperator; return pOperator;

View File

@ -966,20 +966,20 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo,
// table name // table name
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 0); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 0);
colDataAppend(pColInfoData, numOfRows, tName, false); colDataSetVal(pColInfoData, numOfRows, tName, false);
// database name // database name
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 1); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 1);
colDataAppend(pColInfoData, numOfRows, dbname, false); colDataSetVal(pColInfoData, numOfRows, dbname, false);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 2); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 2);
colDataAppend(pColInfoData, numOfRows, tableType, false); colDataSetVal(pColInfoData, numOfRows, tableType, false);
// col name // col name
char colName[TSDB_COL_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; char colName[TSDB_COL_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(colName, schemaRow->pSchema[i].name); STR_TO_VARSTR(colName, schemaRow->pSchema[i].name);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 3); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 3);
colDataAppend(pColInfoData, numOfRows, colName, false); colDataSetVal(pColInfoData, numOfRows, colName, false);
// col type // col type
int8_t colType = schemaRow->pSchema[i].type; int8_t colType = schemaRow->pSchema[i].type;
@ -994,10 +994,10 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo,
(int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); (int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE));
} }
varDataSetLen(colTypeStr, colTypeLen); varDataSetLen(colTypeStr, colTypeLen);
colDataAppend(pColInfoData, numOfRows, (char*)colTypeStr, false); colDataSetVal(pColInfoData, numOfRows, (char*)colTypeStr, false);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 5); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 5);
colDataAppend(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false); colDataSetVal(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false);
for (int32_t j = 6; j <= 8; ++j) { for (int32_t j = 6; j <= 8; ++j) {
pColInfoData = taosArrayGet(dataBlock->pDataBlock, j); pColInfoData = taosArrayGet(dataBlock->pDataBlock, j);

View File

@ -272,7 +272,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
continue; continue;
} else if (isIsfilledPseudoColumn(pExprInfo)) { } else if (isIsfilledPseudoColumn(pExprInfo)) {
bool isFilled = true; bool isFilled = true;
colDataAppend(pDst, pResBlock->info.rows, (char*)&isFilled, false); colDataSetVal(pDst, pResBlock->info.rows, (char*)&isFilled, false);
continue; continue;
} else if (!isInterpFunc(pExprInfo)) { } else if (!isInterpFunc(pExprInfo)) {
if (isGroupKeyFunc(pExprInfo)) { if (isGroupKeyFunc(pExprInfo)) {

View File

@ -28,6 +28,9 @@
#define IS_FINAL_OP(op) ((op)->isFinal) #define IS_FINAL_OP(op) ((op)->isFinal)
#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); #define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL);
#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState"
#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState"
#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState"
typedef struct SStateWindowInfo { typedef struct SStateWindowInfo {
SResultWindowInfo winInfo; SResultWindowInfo winInfo;
@ -2726,6 +2729,38 @@ int32_t getMaxFunResSize(SExprSupp* pSup, int32_t numOfCols) {
return size; return size;
} }
void streamIntervalReleaseState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
int32_t resSize = sizeof(TSKEY);
pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize);
}
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI;
pAPI->stateStore.streamStateCommit(pInfo->pState);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void streamIntervalReloadState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME,
strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size);
TSKEY ts = *(TSKEY*)pBuf;
taosMemoryFree(pBuf);
pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts);
}
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo, int32_t numOfChild) { SExecTaskInfo* pTaskInfo, int32_t numOfChild) {
SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode;
@ -2823,7 +2858,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream,
pInfo->pUpdatedMap = NULL; pInfo->pUpdatedMap = NULL;
int32_t funResSize= getMaxFunResSize(&pOperator->exprSupp, numOfCols); int32_t funResSize= getMaxFunResSize(&pOperator->exprSupp, numOfCols);
pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize,
compareTs, pInfo->pState, pInfo->twAggSup.deleteMark); compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo));
pInfo->dataVersion = 0; pInfo->dataVersion = 0;
pInfo->statestore = pTaskInfo->storageAPI.stateStore; pInfo->statestore = pTaskInfo->storageAPI.stateStore;
pInfo->recvGetAll = false; pInfo->recvGetAll = false;
@ -2835,6 +2870,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream,
pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo,
optrDefaultBufFn, NULL); optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState);
if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
initIntervalDownStream(downstream, pPhyNode->type, pInfo); initIntervalDownStream(downstream, pPhyNode->type, pInfo);
} }
@ -2873,12 +2909,14 @@ void destroyStreamSessionAggOperatorInfo(void* param) {
} }
taosArrayDestroy(pInfo->pChildren); taosArrayDestroy(pInfo->pChildren);
} }
colDataDestroy(&pInfo->twAggSup.timeWindowData); colDataDestroy(&pInfo->twAggSup.timeWindowData);
blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pDelRes);
blockDataDestroy(pInfo->pWinBlock); blockDataDestroy(pInfo->pWinBlock);
blockDataDestroy(pInfo->pUpdateRes); blockDataDestroy(pInfo->pUpdateRes);
tSimpleHashCleanup(pInfo->pStDeleted); tSimpleHashCleanup(pInfo->pStDeleted);
taosArrayDestroy(pInfo->historyWins);
taosMemoryFreeClear(param); taosMemoryFreeClear(param);
} }
@ -2928,7 +2966,7 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin
} }
int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap, int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap,
SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore) { SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, SReadHandle* pHandle, SStorageAPI* pApi) {
pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput); pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput);
pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR); pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR);
pSup->gap = gap; pSup->gap = gap;
@ -2970,6 +3008,16 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx,
pCtx[i].saveHandle.pBuf = pSup->pResultBuf; pCtx[i].saveHandle.pBuf = pSup->pResultBuf;
} }
if (pHandle) {
pSup->winRange = pHandle->winRange;
// temporary
if (pSup->winRange.ekey <= 0) {
pSup->winRange.ekey = INT64_MAX;
}
}
pSup->pSessionAPI = pApi;
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -2997,6 +3045,13 @@ void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT
bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; } bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; }
bool inWinRange(STimeWindow* range, STimeWindow* cur) {
if (cur->skey >= range->skey && cur->ekey <= range->ekey) {
return true;
}
return false;
}
void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId,
SResultWindowInfo* pCurWin) { SResultWindowInfo* pCurWin) {
pCurWin->sessionWin.groupId = groupId; pCurWin->sessionWin.groupId = groupId;
@ -3005,6 +3060,12 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT
int32_t size = pAggSup->resultRowSize; int32_t size = pAggSup->resultRowSize;
int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin,
pAggSup->gap, &pCurWin->pOutputBuf, &size); pAggSup->gap, &pCurWin->pOutputBuf, &size);
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) {
code = TSDB_CODE_FAILED;
releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore);
pCurWin->pOutputBuf = taosMemoryMalloc(size);
}
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
pCurWin->isOutput = true; pCurWin->isOutput = true;
} else { } else {
@ -3151,7 +3212,8 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC
while (1) { while (1) {
SResultWindowInfo winInfo = {0}; SResultWindowInfo winInfo = {0};
SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo);
if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap)) { if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) ||
!inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) {
taosMemoryFree(winInfo.pOutputBuf); taosMemoryFree(winInfo.pOutputBuf);
pAPI->stateStore.streamStateFreeCur(pCur); pAPI->stateStore.streamStateFreeCur(pCur);
break; break;
@ -3375,8 +3437,12 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS
SResultWindowInfo childWin = {0}; SResultWindowInfo childWin = {0};
childWin.sessionWin = *pWinKey; childWin.sessionWin = *pWinKey;
int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin);
if (code == TSDB_CODE_SUCCESS && pWinKey->win.skey <= childWin.sessionWin.win.skey &&
childWin.sessionWin.win.ekey <= pWinKey->win.ekey) { if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) {
continue;
}
if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) {
if (num == 0) { if (num == 0) {
setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin);
code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset);
@ -3475,6 +3541,29 @@ void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo*
// clear the existed group id // clear the existed group id
pBlock->info.id.groupId = 0; pBlock->info.id.groupId = 0;
buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo);
if (pBlock->info.rows == 0) {
cleanupGroupResInfo(pGroupResInfo);
}
}
void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) {
int32_t size = taosArrayGetSize(pAllWins);
if (size == 0) {
return;
}
SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1);
taosArrayPush(pMaxWins, pSeKey);
if (pSeKey->groupId == 0) {
return;
}
uint64_t preGpId = pSeKey->groupId;
for (int32_t i = size - 2; i >= 0; i--) {
pSeKey = taosArrayGet(pAllWins, i);
if (preGpId != pSeKey->groupId) {
taosArrayPush(pMaxWins, pSeKey);
preGpId = pSeKey->groupId;
}
}
} }
static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
@ -3554,7 +3643,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
// if chIndex + 1 - size > 0, add new child // if chIndex + 1 - size > 0, add new child
for (int32_t i = 0; i < chIndex + 1 - size; i++) { for (int32_t i = 0; i < chIndex + 1 - size; i++) {
SOperatorInfo* pChildOp = SOperatorInfo* pChildOp =
createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0); createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0, NULL);
if (!pChildOp) { if (!pChildOp) {
T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY);
} }
@ -3576,6 +3665,9 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated);
tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStUpdated);
pInfo->pStUpdated = NULL; pInfo->pStUpdated = NULL;
if(pInfo->isHistoryOp) {
getMaxTsWins(pInfo->pUpdated, pInfo->historyWins);
}
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
@ -3602,8 +3694,51 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
return NULL; return NULL;
} }
void streamSessionReleaseState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey);
pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, resSize);
}
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void resetWinRange(STimeWindow* winRange) {
winRange->skey = INT16_MIN;
winRange->skey = INT16_MAX;
}
void streamSessionReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
SResultWindowInfo winInfo = {0};
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size);
int32_t num = size / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf;
ASSERT(size == num * sizeof(SSessionKey));
for (int32_t i = 0; i < num; i++) {
SResultWindowInfo winInfo = {0};
setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo);
compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted);
}
taosMemoryFree(pBuf);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo) { SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) {
SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;
int32_t numOfCols = 0; int32_t numOfCols = 0;
int32_t code = TSDB_CODE_OUT_OF_MEMORY; int32_t code = TSDB_CODE_OUT_OF_MEMORY;
@ -3634,7 +3769,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
} }
code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap, code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap,
pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore); pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
goto _error; goto _error;
} }
@ -3666,11 +3801,19 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
pInfo->pStUpdated = NULL; pInfo->pStUpdated = NULL;
pInfo->dataVersion = 0; pInfo->dataVersion = 0;
pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey));
if (!pInfo->historyWins) {
goto _error;
}
if (pHandle) {
pInfo->isHistoryOp = pHandle->fillHistory;
}
setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true,
OP_NOT_OPENED, pInfo, pTaskInfo); OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo,
optrDefaultBufFn, NULL); optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState);
if (downstream) { if (downstream) {
initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup);
@ -3778,7 +3921,6 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated);
tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStUpdated);
pInfo->pStUpdated = NULL; pInfo->pStUpdated = NULL;
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity);
@ -3809,9 +3951,9 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
} }
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo, int32_t numOfChild) { SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle) {
int32_t code = TSDB_CODE_OUT_OF_MEMORY; int32_t code = TSDB_CODE_OUT_OF_MEMORY;
SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo); SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo, pHandle);
if (pOperator == NULL) { if (pOperator == NULL) {
goto _error; goto _error;
} }
@ -3828,14 +3970,14 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL,
destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL); destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL);
} }
setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState);
setOperatorInfo(pOperator, name, pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); setOperatorInfo(pOperator, name, pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->operatorType = pPhyNode->type; pOperator->operatorType = pPhyNode->type;
if (numOfChild > 0) { if (numOfChild > 0) {
pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*));
for (int32_t i = 0; i < numOfChild; i++) { for (int32_t i = 0; i < numOfChild; i++) {
SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0); SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL);
if (pChildOp == NULL) { if (pChildOp == NULL) {
goto _error; goto _error;
} }
@ -3876,6 +4018,7 @@ void destroyStreamStateOperatorInfo(void* param) {
} }
colDataDestroy(&pInfo->twAggSup.timeWindowData); colDataDestroy(&pInfo->twAggSup.timeWindowData);
blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pDelRes);
taosArrayDestroy(pInfo->historyWins);
tSimpleHashCleanup(pInfo->pSeDeleted); tSimpleHashCleanup(pInfo->pSeDeleted);
taosMemoryFreeClear(param); taosMemoryFreeClear(param);
} }
@ -3892,6 +4035,9 @@ bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) {
} }
bool compareStateKey(void* data, void* key) { bool compareStateKey(void* data, void* key) {
if (!data || !key) {
return true;
}
SStateKeys* stateKey = (SStateKeys*)key; SStateKeys* stateKey = (SStateKeys*)key;
stateKey->pData = (char*)key + sizeof(SStateKeys); stateKey->pData = (char*)key + sizeof(SStateKeys);
return compareVal(data, stateKey); return compareVal(data, stateKey);
@ -3913,9 +4059,15 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId,
pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys);
pCurWin->pStateKey->isNull = false; pCurWin->pStateKey->isNull = false;
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) {
code = TSDB_CODE_FAILED;
releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore);
pCurWin->winInfo.pOutputBuf = taosMemoryMalloc(size);
}
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
pCurWin->winInfo.isOutput = true; pCurWin->winInfo.isOutput = true;
} else { } else if (pKeyData) {
if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) {
varDataCopy(pCurWin->pStateKey->pData, pKeyData); varDataCopy(pCurWin->pStateKey->pData, pKeyData);
} else { } else {
@ -4113,6 +4265,10 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
tSimpleHashCleanup(pInfo->pSeUpdated); tSimpleHashCleanup(pInfo->pSeUpdated);
pInfo->pSeUpdated = NULL; pInfo->pSeUpdated = NULL;
if(pInfo->isHistoryOp) {
getMaxTsWins(pInfo->pUpdated, pInfo->historyWins);
}
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
@ -4138,8 +4294,73 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
return NULL; return NULL;
} }
void streamStateReleaseState(SOperatorInfo* pOperator) {
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey);
pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, resSize);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin,
SSHashObj* pStUpdated, SSHashObj* pStDeleted) {
SExprSupp* pSup = &pOperator->exprSupp;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI;
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
SResultRow* pCurResult = NULL;
int32_t numOfOutput = pOperator->exprSupp.numOfExprs;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset);
SResultRow* pWinResult = NULL;
initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset);
updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, true);
compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData);
tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey));
if (pNextWin->isOutput && pStDeleted) {
saveDeleteRes(pStDeleted, pNextWin->sessionWin);
}
removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin);
doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin);
taosMemoryFree(pNextWin->pOutputBuf);
saveSessionOutputBuf(pAggSup, pCurWin);
}
void streamStateReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
SSessionKey seKey = {.win.skey = INT64_MIN, .win.ekey = INT64_MIN, .groupId = 0};
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME,
strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size);
int32_t num = size / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf;
ASSERT(size == num * sizeof(SSessionKey));
for (int32_t i = 0; i < num; i++) {
SStateWindowInfo curInfo = {0};
SStateWindowInfo nextInfo = {0};
setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo);
if (compareStateKey(curInfo.pStateKey,nextInfo.pStateKey)) {
compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pStUpdated, pInfo->pStDeleted);
}
}
taosMemoryFree(pBuf);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo) { SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) {
SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode; SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode;
int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId; int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId;
SColumnNode* pColNode = (SColumnNode*)((STargetNode*)pStateNode->pStateKey)->pExpr; SColumnNode* pColNode = (SColumnNode*)((STargetNode*)pStateNode->pStateKey)->pExpr;
@ -4183,7 +4404,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys
int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes; int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes;
int16_t type = pColNode->node.resType.type; int16_t type = pColNode->node.resType.type;
code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize,
type, &pTaskInfo->storageAPI.stateStore); type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
goto _error; goto _error;
} }
@ -4199,11 +4420,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
pInfo->pSeUpdated = NULL; pInfo->pSeUpdated = NULL;
pInfo->dataVersion = 0; pInfo->dataVersion = 0;
pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey));
if (!pInfo->historyWins) {
goto _error;
}
if (pHandle) {
pInfo->isHistoryOp = pHandle->fillHistory;
}
setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED,
pInfo, pTaskInfo); pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo,
optrDefaultBufFn, NULL); optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamStateReleaseState, streamStateReloadState);
initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup);
code = appendDownstream(pOperator, &downstream, 1); code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
@ -5021,13 +5250,16 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys
pInfo->pUpdated = NULL; pInfo->pUpdated = NULL;
pInfo->pUpdatedMap = NULL; pInfo->pUpdatedMap = NULL;
int32_t funResSize= getMaxFunResSize(pSup, numOfCols); int32_t funResSize= getMaxFunResSize(pSup, numOfCols);
pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize,
compareTs, pInfo->pState, pInfo->twAggSup.deleteMark); pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit(
tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState,
pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo));
setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED,
pInfo, pTaskInfo); pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL,
destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL); destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState);
pInfo->statestore = pTaskInfo->storageAPI.stateStore; pInfo->statestore = pTaskInfo->storageAPI.stateStore;
pInfo->recvGetAll = false; pInfo->recvGetAll = false;

View File

@ -1063,7 +1063,7 @@ static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) {
if (!pData) { if (!pData) {
colDataSetNULL(bdGetColumnInfoData(pHandle->pDataBlock, i), 0); colDataSetNULL(bdGetColumnInfoData(pHandle->pDataBlock, i), 0);
} else { } else {
colDataAppend(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false); colDataSetVal(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false);
} }
} }
pHandle->pDataBlock->info.rows++; pHandle->pDataBlock->info.rows++;

View File

@ -1712,7 +1712,7 @@ int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
varDataSetLen(buf, len); varDataSetLen(buf, len);
colDataAppend(pCol, pBlock->info.rows, buf, false); colDataSetVal(pCol, pBlock->info.rows, buf, false);
tMemBucketDestroy(pMemBucket); tMemBucketDestroy(pMemBucket);
return pResInfo->numOfRes; return pResInfo->numOfRes;

View File

@ -145,7 +145,7 @@ int32_t executeMakePointFunc(SColumnInfoData *pInputData[], int32_t iLeft, int32
goto _exit; goto _exit;
} }
colDataAppend(pOutputData, TMAX(iLeft, iRight), output, (output == NULL)); colDataSetVal(pOutputData, TMAX(iLeft, iRight), output, (output == NULL));
_exit: _exit:
if (output) { if (output) {
@ -165,7 +165,7 @@ int32_t executeGeomFromTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnI
goto _exit; goto _exit;
} }
colDataAppend(pOutputData, i, output, (output == NULL)); colDataSetVal(pOutputData, i, output, (output == NULL));
_exit: _exit:
if (output) { if (output) {
@ -185,7 +185,7 @@ int32_t executeAsTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnInfoDat
goto _exit; goto _exit;
} }
colDataAppend(pOutputData, i, output, (output == NULL)); colDataSetVal(pOutputData, i, output, (output == NULL));
_exit: _exit:
if (output) { if (output) {
@ -213,7 +213,7 @@ int32_t executeRelationFunc(const GEOSGeometry *geom1, const GEOSPreparedGeometr
} }
} }
colDataAppend(pOutputData, i, &res, (res==-1)); colDataSetVal(pOutputData, i, &res, (res==-1));
return code; return code;
} }

View File

@ -84,7 +84,7 @@ void setScalarParam(SScalarParam *sclParam, int32_t type, void *valueArray, TDRo
} }
else { else {
const char *val = (const char *)valueArray + (i * bytes); const char *val = (const char *)valueArray + (i * bytes);
colDataAppend(sclParam->columnData, i, val, false); colDataSetVal(sclParam->columnData, i, val, false);
} }
} }
} }

View File

@ -12,10 +12,17 @@ IF(NOT TD_DARWIN)
"${SOURCE_LIST}/../../../parser/test/mockCatalogService.cpp" "${SOURCE_LIST}/../../../parser/test/mockCatalogService.cpp"
) )
TARGET_LINK_LIBRARIES( IF (TD_GRANT)
plannerTest TARGET_LINK_LIBRARIES(
PUBLIC os util common nodes planner parser catalog transport gtest function qcom plannerTest
) PUBLIC os util common nodes planner parser catalog transport gtest function qcom grant
)
ELSE ()
TARGET_LINK_LIBRARIES(
plannerTest
PUBLIC os util common nodes planner parser catalog transport gtest function qcom
)
ENDIF()
TARGET_INCLUDE_DIRECTORIES( TARGET_INCLUDE_DIRECTORIES(
plannerTest plannerTest

View File

@ -7,10 +7,18 @@ IF(NOT TD_DARWIN)
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST)
ADD_EXECUTABLE(schedulerTest ${SOURCE_LIST}) ADD_EXECUTABLE(schedulerTest ${SOURCE_LIST})
TARGET_LINK_LIBRARIES(
schedulerTest IF (TD_GRANT)
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler TARGET_LINK_LIBRARIES(
) schedulerTest
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler grant
)
ELSE ()
TARGET_LINK_LIBRARIES(
schedulerTest
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler
)
ENDIF()
TARGET_INCLUDE_DIRECTORIES( TARGET_INCLUDE_DIRECTORIES(
schedulerTest schedulerTest

View File

@ -42,10 +42,11 @@ typedef struct {
TdThreadMutex cfMutex; TdThreadMutex cfMutex;
SHashObj* cfInst; SHashObj* cfInst;
int64_t defaultCfInit; int64_t defaultCfInit;
} SBackendHandle; } SBackendWrapper;
void* streamBackendInit(const char* path); void* streamBackendInit(const char* path);
void streamBackendCleanup(void* arg); void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg);
SListNode* streamBackendAddCompare(void* backend, void* arg); SListNode* streamBackendAddCompare(void* backend, void* arg);
void streamBackendDelCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg);

View File

@ -31,8 +31,9 @@ typedef struct {
void* timer; void* timer;
} SStreamGlobalEnv; } SStreamGlobalEnv;
static SStreamGlobalEnv streamEnv; extern SStreamGlobalEnv streamEnv;
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration);
int32_t streamDispatchStreamBlock(SStreamTask* pTask); int32_t streamDispatchStreamBlock(SStreamTask* pTask);
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg);
@ -41,20 +42,20 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT
void destroyStreamDataBlock(SStreamDataBlock* pBlock); void destroyStreamDataBlock(SStreamDataBlock* pBlock);
int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData); int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData);
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* data);
int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock); int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock);
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData);
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet); SEpSet* pEpSet);
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem);
extern int32_t streamBackendId; extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -16,11 +16,13 @@
#include "streamInc.h" #include "streamInc.h"
#include "ttimer.h" #include "ttimer.h"
#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 20480 #define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480
#define STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE (50) #define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30)
#define ONE_MB_F (1048576.0) #define ONE_MB_F (1048576.0)
#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F) #define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F)
SStreamGlobalEnv streamEnv;
int32_t streamInit() { int32_t streamInit() {
int8_t old; int8_t old;
while (1) { while (1) {
@ -36,6 +38,7 @@ int32_t streamInit() {
} }
atomic_store_8(&streamEnv.inited, 1); atomic_store_8(&streamEnv.inited, 1);
} }
return 0; return 0;
} }
@ -52,17 +55,30 @@ void streamCleanUp() {
} }
} }
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) {
char buf[128] = {0};
sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
return taosStrdup(buf);
}
void streamSchedByTimer(void* param, void* tmrId) { void streamSchedByTimer(void* param, void* tmrId) {
SStreamTask* pTask = (void*)param; SStreamTask* pTask = (void*)param;
int8_t status = atomic_load_8(&pTask->triggerStatus);
qDebug("s-task:%s in scheduler timer, trigger status:%d", pTask->id.idStr, status);
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
streamMetaReleaseTask(NULL, pTask); streamMetaReleaseTask(NULL, pTask);
qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr);
return; return;
} }
if (atomic_load_8(&pTask->triggerStatus) == TASK_TRIGGER_STATUS__ACTIVE) { if (status == TASK_TRIGGER_STATUS__ACTIVE) {
SStreamTrigger* trigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); SStreamTrigger* trigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0);
if (trigger == NULL) return; if (trigger == NULL) {
return;
}
trigger->type = STREAM_INPUT__GET_RES; trigger->type = STREAM_INPUT__GET_RES;
trigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); trigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
if (trigger->pBlock == NULL) { if (trigger->pBlock == NULL) {
@ -74,23 +90,28 @@ void streamSchedByTimer(void* param, void* tmrId) {
atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE);
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) { if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) {
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); taosFreeQitem(trigger);
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer);
return; return;
} }
streamSchedExec(pTask); streamSchedExec(pTask);
} }
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer);
} }
int32_t streamSetupTrigger(SStreamTask* pTask) { int32_t streamSetupScheduleTrigger(SStreamTask* pTask) {
if (pTask->triggerParam != 0) { if (pTask->triggerParam != 0) {
int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1);
ASSERT(ref == 2); ASSERT(ref == 2 && pTask->schedTimer == NULL);
pTask->timer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer);
qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam);
pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer);
pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE; pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE;
} }
return 0; return 0;
} }
@ -103,16 +124,20 @@ int32_t streamSchedExec(SStreamTask* pTask) {
if (pRunReq == NULL) { if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
qError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr);
return -1; return -1;
} }
pRunReq->head.vgId = pTask->nodeId; pRunReq->head.vgId = pTask->info.nodeId;
pRunReq->streamId = pTask->id.streamId; pRunReq->streamId = pTask->id.streamId;
pRunReq->taskId = pTask->id.taskId; pRunReq->taskId = pTask->id.taskId;
qDebug("trigger to run s-task:%s", pTask->id.idStr);
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg);
qDebug("trigger to run s-task:%s", pTask->id.idStr); } else {
qDebug("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus);
} }
return 0; return 0;
@ -142,7 +167,7 @@ int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pR
pDispatchRsp->streamId = htobe64(pReq->streamId); pDispatchRsp->streamId = htobe64(pReq->streamId);
pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId);
pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId);
pDispatchRsp->downstreamNodeId = htonl(pTask->nodeId); pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId);
pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId);
pRsp->pCont = buf; pRsp->pCont = buf;
@ -158,21 +183,18 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
// enqueue // enqueue
if (pData != NULL) { if (pData != NULL) {
qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x, reqId %" PRId64, pTask->id.idStr, pTask->selfChildId, qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId,
pReq->srcTaskId, pReq->reqId); pReq->srcTaskId, pReq->srcNodeId, pReq->reqId);
pData->type = STREAM_INPUT__DATA_RETRIEVE; pData->type = STREAM_INPUT__DATA_RETRIEVE;
pData->srcVgId = 0; pData->srcVgId = 0;
// decode
/*pData->blocks = pReq->data;*/
/*pBlock->sourceVer = pReq->sourceVer;*/
streamRetrieveReqToData(pReq, pData); streamRetrieveReqToData(pReq, pData);
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) {
status = TASK_INPUT_STATUS__NORMAL; status = TASK_INPUT_STATUS__NORMAL;
} else { } else {
status = TASK_INPUT_STATUS__FAILED; status = TASK_INPUT_STATUS__FAILED;
} }
} else { } else { // todo handle oom
/*streamTaskInputFail(pTask);*/ /*streamTaskInputFail(pTask);*/
/*status = TASK_INPUT_STATUS__FAILED;*/ /*status = TASK_INPUT_STATUS__FAILED;*/
} }
@ -187,6 +209,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
pRsp->pCont = buf; pRsp->pCont = buf;
pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamRetrieveRsp); pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamRetrieveRsp);
tmsgSendRsp(pRsp); tmsgSendRsp(pRsp);
return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1;
} }
@ -231,9 +254,26 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
return 0; return 0;
} }
// todo record the idle time for dispatch data
int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) {
if (code != TSDB_CODE_SUCCESS) {
// dispatch message failed: network error, or node not available.
// in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set
// flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure
// happened too fast. todo handle the shuffle dispatch failure
qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr,
pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount);
int32_t ret = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData);
if (ret != TSDB_CODE_SUCCESS) {
}
return TSDB_CODE_SUCCESS;
}
qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code); qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code);
// there are other dispatch message not response yet
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp); qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp);
@ -242,23 +282,39 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
} }
} }
int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus); pTask->msgInfo.retryCount = 0;
ASSERT(old == TASK_OUTPUT_STATUS__WAIT); ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT);
// the input queue of the (down stream) task that receive the output data is full, so the TASK_INPUT_STATUS_BLOCKED is rsp qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputStatus);
// todo we need to send EMPTY PACKAGE to detect if the input queue is available for output of upstream task, every 50 ms.
// the input queue of the (down stream) task that receive the output data is full,
// so the TASK_INPUT_STATUS_BLOCKED is rsp
// todo blocking the output status
if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) {
// TODO: init recover timer pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time
qError("s-task:%s inputQ of downstream task:0x%x is full, need to block output", pTask->id.idStr, pRsp->downstreamTaskId);
int32_t waitDuration = 300; // 300 ms
qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 "wait for %dms and retry dispatch data",
pTask->id.idStr, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, waitDuration);
streamRetryDispatchStreamBlock(pTask, waitDuration);
} else { // pipeline send data in output queue
// this message has been sent successfully, let's try next one.
destroyStreamDataBlock(pTask->msgInfo.pData);
pTask->msgInfo.pData = NULL;
if (pTask->msgInfo.blockingTs != 0) {
int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs;
qDebug("s-task:%s resume to normal from inputQ blocking, idle time:%"PRId64"ms", pTask->id.idStr, el);
pTask->msgInfo.blockingTs = 0;
}
// now ready for next data output
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
qError("s-task:%s ignore error, and reset task output status:%d", pTask->id.idStr, pTask->outputStatus);
return 0; // otherwise, continue dispatch the first block to down stream task in pipeline
streamDispatchStreamBlock(pTask);
} }
// otherwise, continue dispatch the first block to down stream task in pipeline
streamDispatchStreamBlock(pTask);
return 0; return 0;
} }
@ -267,25 +323,23 @@ int32_t streamProcessRunReq(SStreamTask* pTask) {
return -1; return -1;
} }
/*if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/ /*if (pTask->dispatchType == TASK_OUTPUT__FIXED_DISPATCH || pTask->dispatchType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/
/*streamDispatchStreamBlock(pTask);*/ /*streamDispatchStreamBlock(pTask);*/
/*}*/ /*}*/
return 0; return 0;
} }
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) {
qDebug("s-task:%s receive retrieve req from node %d taskId:0x%x", pTask->id.idStr, pReq->srcNodeId, pReq->srcTaskId);
streamTaskEnqueueRetrieve(pTask, pReq, pRsp); streamTaskEnqueueRetrieve(pTask, pReq, pRsp);
ASSERT(pTask->info.taskLevel != TASK_LEVEL__SINK);
ASSERT(pTask->taskLevel != TASK_LEVEL__SINK);
streamSchedExec(pTask); streamSchedExec(pTask);
return 0; return 0;
} }
bool tInputQueueIsFull(const SStreamTask* pTask) { bool tInputQueueIsFull(const SStreamTask* pTask) {
bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY; bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUE_CAPACITY;
double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue); double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue);
return (isFull || size >= STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE); return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE);
} }
int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
@ -295,9 +349,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
if (type == STREAM_INPUT__DATA_SUBMIT) { if (type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; SStreamDataSubmit* px = (SStreamDataSubmit*)pItem;
if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) { if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) {
qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total, pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size); size);
streamDataSubmitDestroy(px); streamDataSubmitDestroy(px);
taosFreeQitem(pItem); taosFreeQitem(pItem);
@ -315,9 +369,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
px->submit.msgLen, px->submit.ver, total, size + px->submit.msgLen/1048576.0); px->submit.msgLen, px->submit.ver, total, size + px->submit.msgLen/1048576.0);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE ||
type == STREAM_INPUT__REF_DATA_BLOCK) { type == STREAM_INPUT__REF_DATA_BLOCK) {
if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) { if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) {
qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total, pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size); size);
destroyStreamDataBlock((SStreamDataBlock*) pItem); destroyStreamDataBlock((SStreamDataBlock*) pItem);
return -1; return -1;
@ -346,19 +400,21 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; } static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; }
void* streamQueueNextItem(SStreamQueue* queue) { void* streamQueueNextItem(SStreamQueue* pQueue) {
int8_t dequeueFlag = atomic_exchange_8(&queue->status, STREAM_QUEUE__PROCESSING); int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING);
if (dequeueFlag == STREAM_QUEUE__FAILED) {
ASSERT(queue->qItem != NULL); if (flag == STREAM_QUEUE__FAILED) {
return streamQueueCurItem(queue); ASSERT(pQueue->qItem != NULL);
return streamQueueCurItem(pQueue);
} else { } else {
queue->qItem = NULL; pQueue->qItem = NULL;
taosGetQitem(queue->qall, &queue->qItem); taosGetQitem(pQueue->qall, &pQueue->qItem);
if (queue->qItem == NULL) { if (pQueue->qItem == NULL) {
taosReadAllQitems(queue->queue, queue->qall); taosReadAllQitems(pQueue->queue, pQueue->qall);
taosGetQitem(queue->qall, &queue->qItem); taosGetQitem(pQueue->qall, &pQueue->qItem);
} }
return streamQueueCurItem(queue);
return streamQueueCurItem(pQueue);
} }
} }

View File

@ -40,16 +40,8 @@ typedef struct {
rocksdb_comparator_t** pCompares; rocksdb_comparator_t** pCompares;
} RocksdbCfInst; } RocksdbCfInst;
uint32_t nextPow2(uint32_t x) { uint32_t nextPow2(uint32_t x);
x = x - 1; int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf);
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x + 1;
}
int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf);
void destroyRocksdbCfInst(RocksdbCfInst* inst); void destroyRocksdbCfInst(RocksdbCfInst* inst);
@ -71,7 +63,22 @@ typedef int (*BackendCmpFunc)(void* state, const char* aBuf, size_t aLen, const
typedef void (*DestroyFunc)(void* state); typedef void (*DestroyFunc)(void* state);
typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest); typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest);
typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest); typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest);
typedef struct {
const char* key;
int32_t len;
int idx;
BackendCmpFunc cmpFunc;
EncodeFunc enFunc;
DecodeFunc deFunc;
ToStringFunc toStrFunc;
CompareName cmpName;
DestroyFunc detroyFunc;
EncodeValueFunc enValueFunc;
DecodeValueFunc deValueFunc;
} SCfInit;
#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX));
const char* compareDefaultName(void* name); const char* compareDefaultName(void* name);
const char* compareStateName(void* name); const char* compareStateName(void* name);
const char* compareWinKeyName(void* name); const char* compareWinKeyName(void* name);
@ -80,11 +87,67 @@ const char* compareFuncKeyName(void* name);
const char* compareParKeyName(void* name); const char* compareParKeyName(void* name);
const char* comparePartagKeyName(void* name); const char* comparePartagKeyName(void* name);
int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int defaultKeyEncode(void* k, char* buf);
int defaultKeyDecode(void* k, char* buf);
int defaultKeyToString(void* k, char* buf);
int stateKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int stateKeyEncode(void* k, char* buf);
int stateKeyDecode(void* k, char* buf);
int stateKeyToString(void* k, char* buf);
int stateSessionKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int stateSessionKeyEncode(void* ses, char* buf);
int stateSessionKeyDecode(void* ses, char* buf);
int stateSessionKeyToString(void* k, char* buf);
int winKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int winKeyEncode(void* k, char* buf);
int winKeyDecode(void* k, char* buf);
int winKeyToString(void* k, char* buf);
int tupleKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int tupleKeyEncode(void* k, char* buf);
int tupleKeyDecode(void* k, char* buf);
int tupleKeyToString(void* k, char* buf);
int parKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int parKeyEncode(void* k, char* buf);
int parKeyDecode(void* k, char* buf);
int parKeyToString(void* k, char* buf);
int stremaValueEncode(void* k, char* buf);
int streamValueDecode(void* k, char* buf);
int32_t streamValueToString(void* k, char* buf);
int32_t streaValueIsStale(void* k, int64_t ts);
void destroyFunc(void* arg);
int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest);
int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest);
SCfInit ginitDict[] = {
{"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName,
destroyFunc, encodeValueFunc, decodeValueFunc},
{"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString,
compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc},
{"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
};
void* streamBackendInit(const char* path) { void* streamBackendInit(const char* path) {
uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20;
qDebug("start to init stream backend at %s", path); qDebug("start to init stream backend at %s", path);
SBackendHandle* pHandle = taosMemoryCalloc(1, sizeof(SBackendHandle)); SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper));
pHandle->list = tdListNew(sizeof(SCfComparator)); pHandle->list = tdListNew(sizeof(SCfComparator));
taosThreadMutexInit(&pHandle->mutex, NULL); taosThreadMutexInit(&pHandle->mutex, NULL);
taosThreadMutexInit(&pHandle->cfMutex, NULL); taosThreadMutexInit(&pHandle->cfMutex, NULL);
@ -154,8 +217,8 @@ _EXIT:
return NULL; return NULL;
} }
void streamBackendCleanup(void* arg) { void streamBackendCleanup(void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)arg; SBackendWrapper* pHandle = (SBackendWrapper*)arg;
RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL); RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL);
while (pIter != NULL) { while (pIter != NULL) {
RocksdbCfInst* inst = *pIter; RocksdbCfInst* inst = *pIter;
destroyRocksdbCfInst(inst); destroyRocksdbCfInst(inst);
@ -194,17 +257,79 @@ void streamBackendCleanup(void* arg) {
qDebug("destroy stream backend backend:%p", pHandle); qDebug("destroy stream backend backend:%p", pHandle);
return; return;
} }
void streamBackendHandleCleanup(void* arg) {
SBackendCfWrapper* wrapper = arg;
bool remove = wrapper->remove;
qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr);
if (wrapper->rocksdb == NULL) {
return;
}
int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]);
char* err = NULL;
if (remove) {
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL)
rocksdb_drop_column_family(wrapper->rocksdb, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[i], &err);
if (err != NULL) {
// qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
} else {
rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create();
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
rocksdb_flushoptions_destroy(flushOpt);
}
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL) {
rocksdb_column_family_handle_destroy(wrapper->pHandle[i]);
}
}
taosMemoryFreeClear(wrapper->pHandle);
for (int i = 0; i < cfLen; i++) {
rocksdb_options_destroy(wrapper->cfOpts[i]);
rocksdb_block_based_options_destroy(((RocksdbCfParam*)wrapper->param)[i].tableOpt);
}
if (remove) {
streamBackendDelCompare(wrapper->pBackend, wrapper->pComparNode);
}
rocksdb_writeoptions_destroy(wrapper->writeOpts);
wrapper->writeOpts = NULL;
rocksdb_readoptions_destroy(wrapper->readOpts);
wrapper->readOpts = NULL;
taosMemoryFreeClear(wrapper->cfOpts);
taosMemoryFreeClear(wrapper->param);
taosThreadRwlockDestroy(&wrapper->rwLock);
wrapper->rocksdb = NULL;
taosReleaseRef(streamBackendId, wrapper->backendId);
qDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr);
taosMemoryFree(wrapper);
return;
}
SListNode* streamBackendAddCompare(void* backend, void* arg) { SListNode* streamBackendAddCompare(void* backend, void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)backend; SBackendWrapper* pHandle = (SBackendWrapper*)backend;
SListNode* node = NULL; SListNode* node = NULL;
taosThreadMutexLock(&pHandle->mutex); taosThreadMutexLock(&pHandle->mutex);
node = tdListAdd(pHandle->list, arg); node = tdListAdd(pHandle->list, arg);
taosThreadMutexUnlock(&pHandle->mutex); taosThreadMutexUnlock(&pHandle->mutex);
return node; return node;
} }
void streamBackendDelCompare(void* backend, void* arg) { void streamBackendDelCompare(void* backend, void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)backend; SBackendWrapper* pHandle = (SBackendWrapper*)backend;
SListNode* node = NULL; SListNode* node = NULL;
taosThreadMutexLock(&pHandle->mutex); taosThreadMutexLock(&pHandle->mutex);
node = tdListPopNode(pHandle->list, arg); node = tdListPopNode(pHandle->list, arg);
taosThreadMutexUnlock(&pHandle->mutex); taosThreadMutexUnlock(&pHandle->mutex);
@ -542,23 +667,6 @@ void destroyFunc(void* arg) {
return; return;
} }
typedef struct {
const char* key;
int32_t len;
int idx;
BackendCmpFunc cmpFunc;
EncodeFunc enFunc;
DecodeFunc deFunc;
ToStringFunc toStrFunc;
CompareName cmpName;
DestroyFunc detroyFunc;
EncodeValueFunc enValueFunc;
DecodeValueFunc deValueFunc;
} SCfInit;
#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX));
int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) {
SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)}; SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)};
int32_t len = 0; int32_t len = 0;
@ -613,22 +721,6 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) {
} }
return key.len; return key.len;
} }
SCfInit ginitDict[] = {
{"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName,
destroyFunc, encodeValueFunc, decodeValueFunc},
{"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString,
compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc},
{"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
};
const char* compareDefaultName(void* arg) { const char* compareDefaultName(void* arg) {
(void)arg; (void)arg;
@ -697,11 +789,11 @@ void destroyRocksdbCfInst(RocksdbCfInst* inst) {
} }
int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) { int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) {
SBackendHandle* handle = backend; SBackendWrapper* handle = backend;
char* err = NULL; char* err = NULL;
int64_t streamId; int64_t streamId;
int32_t taskId, dummy = 0; int32_t taskId, dummy = 0;
char suffix[64] = {0}; char suffix[64] = {0};
rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*));
RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam*)); RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam*));
@ -821,23 +913,30 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t
int streamStateOpenBackend(void* backend, SStreamState* pState) { int streamStateOpenBackend(void* backend, SStreamState* pState) {
qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId);
taosAcquireRef(streamBackendId, pState->streamBackendRid); taosAcquireRef(streamBackendId, pState->streamBackendRid);
SBackendHandle* handle = backend; SBackendWrapper* handle = backend;
SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper));
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId);
taosThreadMutexLock(&handle->cfMutex); taosThreadMutexLock(&handle->cfMutex);
RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
if (ppInst != NULL && *ppInst != NULL) { if (ppInst != NULL && *ppInst != NULL) {
RocksdbCfInst* inst = *ppInst; RocksdbCfInst* inst = *ppInst;
pState->pTdbState->rocksdb = inst->db; pBackendCfWrapper->rocksdb = inst->db;
pState->pTdbState->pHandle = (void**)inst->pHandle; pBackendCfWrapper->pHandle = (void**)inst->pHandle;
pState->pTdbState->writeOpts = inst->wOpt; pBackendCfWrapper->writeOpts = inst->wOpt;
pState->pTdbState->readOpts = inst->rOpt; pBackendCfWrapper->readOpts = inst->rOpt;
pState->pTdbState->cfOpts = (void**)(inst->cfOpt); pBackendCfWrapper->cfOpts = (void**)(inst->cfOpt);
pState->pTdbState->dbOpt = handle->dbOpt; pBackendCfWrapper->dbOpt = handle->dbOpt;
pState->pTdbState->param = inst->param; pBackendCfWrapper->param = inst->param;
pState->pTdbState->pBackend = handle; pBackendCfWrapper->pBackend = handle;
pState->pTdbState->pComparNode = inst->pCompareNode; pBackendCfWrapper->pComparNode = inst->pCompareNode;
taosThreadMutexUnlock(&handle->cfMutex); taosThreadMutexUnlock(&handle->cfMutex);
pBackendCfWrapper->backendId = pState->streamBackendRid;
memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr));
int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper);
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper;
qInfo("succ to open state %p on backendWrapper, %p, %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr);
return 0; return 0;
} }
taosThreadMutexUnlock(&handle->cfMutex); taosThreadMutexUnlock(&handle->cfMutex);
@ -870,27 +969,33 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) {
pCompare[i] = compare; pCompare[i] = compare;
} }
rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(cfLen, sizeof(rocksdb_column_family_handle_t*)); rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(cfLen, sizeof(rocksdb_column_family_handle_t*));
pState->pTdbState->rocksdb = handle->db; pBackendCfWrapper->rocksdb = handle->db;
pState->pTdbState->pHandle = (void**)cfHandle; pBackendCfWrapper->pHandle = (void**)cfHandle;
pState->pTdbState->writeOpts = rocksdb_writeoptions_create(); pBackendCfWrapper->writeOpts = rocksdb_writeoptions_create();
pState->pTdbState->readOpts = rocksdb_readoptions_create(); pBackendCfWrapper->readOpts = rocksdb_readoptions_create();
pState->pTdbState->cfOpts = (void**)cfOpt; pBackendCfWrapper->cfOpts = (void**)cfOpt;
pState->pTdbState->dbOpt = handle->dbOpt; pBackendCfWrapper->dbOpt = handle->dbOpt;
pState->pTdbState->param = param; pBackendCfWrapper->param = param;
pState->pTdbState->pBackend = handle; pBackendCfWrapper->pBackend = handle;
pBackendCfWrapper->backendId = pState->streamBackendRid;
taosThreadRwlockInit(&pState->pTdbState->rwLock, NULL); taosThreadRwlockInit(&pBackendCfWrapper->rwLock, NULL);
SCfComparator compare = {.comp = pCompare, .numOfComp = cfLen}; SCfComparator compare = {.comp = pCompare, .numOfComp = cfLen};
pState->pTdbState->pComparNode = streamBackendAddCompare(handle, &compare); pBackendCfWrapper->pComparNode = streamBackendAddCompare(handle, &compare);
rocksdb_writeoptions_disable_WAL(pState->pTdbState->writeOpts, 1); rocksdb_writeoptions_disable_WAL(pBackendCfWrapper->writeOpts, 1);
qInfo("succ to open state %p on backend, %p, 0x%" PRIx64 "-%d", pState, handle, pState->streamId, pState->taskId); memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr));
int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper);
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper;
qInfo("succ to open state %p on backendWrapper %p %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr);
return 0; return 0;
} }
void streamStateCloseBackend(SStreamState* pState, bool remove) { void streamStateCloseBackend(SStreamState* pState, bool remove) {
SBackendHandle* pHandle = pState->pTdbState->pBackend; SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SBackendWrapper* pHandle = wrapper->pBackend;
taosThreadMutexLock(&pHandle->cfMutex); taosThreadMutexLock(&pHandle->cfMutex);
RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1);
if (ppInst != NULL && *ppInst != NULL) { if (ppInst != NULL && *ppInst != NULL) {
RocksdbCfInst* inst = *ppInst; RocksdbCfInst* inst = *ppInst;
taosMemoryFree(inst); taosMemoryFree(inst);
@ -899,63 +1004,10 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) {
taosThreadMutexUnlock(&pHandle->cfMutex); taosThreadMutexUnlock(&pHandle->cfMutex);
char* status[] = {"close", "drop"}; char* status[] = {"close", "drop"};
qInfo("start to close %s state %p on backend %p 0x%" PRIx64 "-%d", status[remove == false ? 0 : 1], pState, pHandle, qInfo("start to close %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper,
pState->streamId, pState->taskId); wrapper->idstr);
if (pState->pTdbState->rocksdb == NULL) { wrapper->remove |= remove; // update by other pState
return; taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId);
}
int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]);
char* err = NULL;
if (remove) {
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL)
rocksdb_drop_column_family(pState->pTdbState->rocksdb,
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
} else {
rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create();
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL)
rocksdb_flush_cf(pState->pTdbState->rocksdb, flushOpt, pState->pTdbState->pHandle[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
rocksdb_flushoptions_destroy(flushOpt);
}
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL) {
rocksdb_column_family_handle_destroy(pState->pTdbState->pHandle[i]);
}
}
taosMemoryFreeClear(pState->pTdbState->pHandle);
for (int i = 0; i < cfLen; i++) {
rocksdb_options_destroy(pState->pTdbState->cfOpts[i]);
rocksdb_block_based_options_destroy(((RocksdbCfParam*)pState->pTdbState->param)[i].tableOpt);
}
if (remove) {
streamBackendDelCompare(pState->pTdbState->pBackend, pState->pTdbState->pComparNode);
}
rocksdb_writeoptions_destroy(pState->pTdbState->writeOpts);
pState->pTdbState->writeOpts = NULL;
rocksdb_readoptions_destroy(pState->pTdbState->readOpts);
pState->pTdbState->readOpts = NULL;
taosMemoryFreeClear(pState->pTdbState->cfOpts);
taosMemoryFreeClear(pState->pTdbState->param);
taosThreadRwlockDestroy(&pState->pTdbState->rwLock);
pState->pTdbState->rocksdb = NULL;
taosReleaseRef(streamBackendId, pState->streamBackendRid);
} }
void streamStateDestroyCompar(void* arg) { void streamStateDestroyCompar(void* arg) {
SCfComparator* comp = (SCfComparator*)arg; SCfComparator* comp = (SCfComparator*)arg;
@ -975,26 +1027,26 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) {
} }
} }
if (pState != NULL && idx != -1) { if (pState != NULL && idx != -1) {
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_column_family_handle_t* cf = NULL; rocksdb_column_family_handle_t* cf = NULL;
taosThreadRwlockRdlock(&pState->pTdbState->rwLock); taosThreadRwlockRdlock(&wrapper->rwLock);
cf = pState->pTdbState->pHandle[idx]; cf = wrapper->pHandle[idx];
taosThreadRwlockUnlock(&pState->pTdbState->rwLock); taosThreadRwlockUnlock(&wrapper->rwLock);
if (cf == NULL) { if (cf == NULL) {
char buf[128] = {0}; char buf[128] = {0};
GEN_COLUMN_FAMILY_NAME(buf, pState->pTdbState->idstr, ginitDict[idx].key); GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key);
char* err = NULL; char* err = NULL;
taosThreadRwlockWrlock(&pState->pTdbState->rwLock); taosThreadRwlockWrlock(&wrapper->rwLock);
cf = rocksdb_create_column_family(pState->pTdbState->rocksdb, pState->pTdbState->cfOpts[idx], buf, &err); cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err);
if (err != NULL) { if (err != NULL) {
idx = -1; idx = -1;
qError("failed to to open cf, %p 0x%" PRIx64 "-%d_%s, reason:%s", pState, pState->streamId, pState->taskId, qError("failed to to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err);
funcName, err);
taosMemoryFree(err); taosMemoryFree(err);
} else { } else {
pState->pTdbState->pHandle[idx] = cf; wrapper->pHandle[idx] = cf;
} }
taosThreadRwlockUnlock(&pState->pTdbState->rwLock); taosThreadRwlockUnlock(&wrapper->rwLock);
} }
} }
@ -1014,8 +1066,9 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
rocksdb_readoptions_t** readOpt) { rocksdb_readoptions_t** readOpt) {
int idx = streamStateGetCfIdx(pState, cfName); int idx = streamStateGetCfIdx(pState, cfName);
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
if (snapshot != NULL) { if (snapshot != NULL) {
*snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(pState->pTdbState->rocksdb); *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb);
} }
rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create();
*readOpt = rOpt; *readOpt = rOpt;
@ -1023,8 +1076,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
rocksdb_readoptions_set_snapshot(rOpt, *snapshot); rocksdb_readoptions_set_snapshot(rOpt, *snapshot);
rocksdb_readoptions_set_fill_cache(rOpt, 0); rocksdb_readoptions_set_fill_cache(rOpt, 0);
return rocksdb_create_iterator_cf(pState->pTdbState->rocksdb, rOpt, return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]);
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[idx]);
} }
#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ #define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \
@ -1038,15 +1090,15 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
code = -1; \ code = -1; \
break; \ break; \
} \ } \
char toString[128] = {0}; \ SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = \ rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ rocksdb_t* db = wrapper->rocksdb; \
rocksdb_t* db = pState->pTdbState->rocksdb; \ rocksdb_writeoptions_t* opts = wrapper->writeOpts; \
rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \ char* ttlV = NULL; \
char* ttlV = NULL; \ int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \
int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \
rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \
if (err != NULL) { \ if (err != NULL) { \
taosMemoryFree(err); \ taosMemoryFree(err); \
@ -1058,81 +1110,76 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
taosMemoryFree(ttlV); \ taosMemoryFree(ttlV); \
} while (0); } while (0);
#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ #define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \
do { \ do { \
code = 0; \ code = 0; \
char buf[128] = {0}; \ char buf[128] = {0}; \
char* err = NULL; \ char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \ int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \ if (i < 0) { \
qWarn("streamState failed to get cf name: %s", funcname); \ qWarn("streamState failed to get cf name: %s", funcname); \
code = -1; \ code = -1; \
break; \ break; \
} \ } \
char toString[128] = {0}; \ SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ char toString[128] = {0}; \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
rocksdb_column_family_handle_t* pHandle = \ int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = pState->pTdbState->rocksdb; \ rocksdb_t* db = wrapper->rocksdb; \
rocksdb_readoptions_t* opts = pState->pTdbState->readOpts; \ rocksdb_readoptions_t* opts = wrapper->readOpts; \
size_t len = 0; \ size_t len = 0; \
char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \
if (val == NULL || len == 0) { \ if (val == NULL || len == 0) { \
if (err == NULL) { \ if (err == NULL) { \
qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, pState->pTdbState->idstr, \ qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \
funcname); \ } else { \
} else { \ qError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \
qError("streamState str: %s failed to read from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \ taosMemoryFreeClear(err); \
err); \ } \
taosMemoryFreeClear(err); \ code = -1; \
} \ } else { \
code = -1; \ char* p = NULL; \
} else { \ int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \
char* p = NULL; \ if (tlen <= 0) { \
int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \
if (tlen <= 0) { \ funcname); \
qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, pState->pTdbState->idstr, \ code = -1; \
funcname); \ } else { \
code = -1; \ qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \
} else { \ } \
qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, pState->pTdbState->idstr, funcname, \ taosMemoryFree(val); \
tlen); \ if (vLen != NULL) *vLen = tlen; \
} \ } \
taosMemoryFree(val); \ if (code == 0) qDebug("streamState str: %s succ to read from %s_%s", toString, wrapper->idstr, funcname); \
if (vLen != NULL) *vLen = tlen; \
} \
if (code == 0) \
qDebug("streamState str: %s succ to read from %s_%s", toString, pState->pTdbState->idstr, funcname); \
} while (0); } while (0);
#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ #define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \
do { \ do { \
code = 0; \ code = 0; \
char buf[128] = {0}; \ char buf[128] = {0}; \
char* err = NULL; \ char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \ int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \ if (i < 0) { \
qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \
code = -1; \ code = -1; \
break; \ break; \
} \ } \
char toString[128] = {0}; \ SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ char toString[128] = {0}; \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
rocksdb_column_family_handle_t* pHandle = \ int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = pState->pTdbState->rocksdb; \ rocksdb_t* db = wrapper->rocksdb; \
rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \ rocksdb_writeoptions_t* opts = wrapper->writeOpts; \
rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \
if (err != NULL) { \ if (err != NULL) { \
qError("streamState str: %s failed to del from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \ qError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \
err); \ taosMemoryFree(err); \
taosMemoryFree(err); \ code = -1; \
code = -1; \ } else { \
} else { \ qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \
qTrace("streamState str: %s succ to del from %s_%s", toString, pState->pTdbState->idstr, funcname); \ } \
} \
} while (0); } while (0);
// state cf // state cf
@ -1158,18 +1205,19 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) {
int32_t streamStateClear_rocksdb(SStreamState* pState) { int32_t streamStateClear_rocksdb(SStreamState* pState) {
qDebug("streamStateClear_rocksdb"); qDebug("streamStateClear_rocksdb");
char sKeyStr[128] = {0}; SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
char eKeyStr[128] = {0}; char sKeyStr[128] = {0};
SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; char eKeyStr[128] = {0};
SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number};
SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number};
int sLen = stateKeyEncode(&sKey, sKeyStr); int sLen = stateKeyEncode(&sKey, sKeyStr);
int eLen = stateKeyEncode(&eKey, eKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr);
if (pState->pTdbState->pHandle[1] != NULL) { if (wrapper->pHandle[1] != NULL) {
char* err = NULL; char* err = NULL;
rocksdb_delete_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, pState->pTdbState->pHandle[1], rocksdb_delete_range_cf(wrapper->rocksdb, wrapper->writeOpts, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen,
sKeyStr, sLen, eKeyStr, eLen, &err); &err);
if (err != NULL) { if (err != NULL) {
char toStringStart[128] = {0}; char toStringStart[128] = {0};
char toStringEnd[128] = {0}; char toStringEnd[128] = {0};
@ -1179,7 +1227,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) {
qWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); qWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err);
taosMemoryFree(err); taosMemoryFree(err);
} else { } else {
rocksdb_compact_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen);
} }
} }
@ -1273,8 +1321,9 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin
if (pCur == NULL) { if (pCur == NULL) {
return NULL; return NULL;
} }
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
pCur->number = pState->number; pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1307,15 +1356,16 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin
SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateGetCur_rocksdb"); qDebug("streamStateGetCur_rocksdb");
int32_t code = 0; int32_t code = 0;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX}; const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX};
STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0); STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0);
char buf[128] = {0}; char buf[128] = {0};
int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); int32_t klen = stateKeyEncode((void*)&maxStateKey, buf);
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) return NULL; if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); rocksdb_iter_seek(pCur->iter, buf, (size_t)klen);
@ -1335,10 +1385,11 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK
SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateGetCur_rocksdb"); qDebug("streamStateGetCur_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) return NULL; if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1426,12 +1477,14 @@ int32_t streamStateSessionDel_rocksdb(SStreamState* pState, const SSessionKey* k
} }
SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) { SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) {
qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) { if (pCur == NULL) {
return NULL; return NULL;
} }
pCur->number = pState->number; pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1467,11 +1520,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta
} }
SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) {
qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) { if (pCur == NULL) {
return NULL; return NULL;
} }
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
pCur->number = pState->number; pCur->number = pState->number;
@ -1504,11 +1558,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta
SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) {
qDebug("streamStateSessionSeekKeyNext_rocksdb"); qDebug("streamStateSessionSeekKeyNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) { if (pCur == NULL) {
return NULL; return NULL;
} }
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
pCur->number = pState->number; pCur->number = pState->number;
@ -1597,11 +1652,12 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) {
SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillGetCur_rocksdb"); qDebug("streamStateFillGetCur_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
if (pCur == NULL) return NULL; if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1656,12 +1712,13 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey,
SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyNext_rocksdb"); qDebug("streamStateFillSeekKeyNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (!pCur) { if (!pCur) {
return NULL; return NULL;
} }
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1692,12 +1749,13 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const
} }
SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyPrev_rocksdb"); qDebug("streamStateFillSeekKeyPrev_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) { if (pCur == NULL) {
return NULL; return NULL;
} }
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1728,12 +1786,13 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const
} }
int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) {
qDebug("streamStateSessionGetKeyByRange_rocksdb"); qDebug("streamStateSessionGetKeyByRange_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) { if (pCur == NULL) {
return -1; return -1;
} }
pCur->number = pState->number; pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
@ -1964,6 +2023,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co
int code = 0; int code = 0;
char* err = NULL; char* err = NULL;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_snapshot_t* snapshot = NULL; rocksdb_snapshot_t* snapshot = NULL;
rocksdb_readoptions_t* readopts = NULL; rocksdb_readoptions_t* readopts = NULL;
rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts); rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts);
@ -1996,15 +2056,16 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co
} }
rocksdb_iter_next(pIter); rocksdb_iter_next(pIter);
} }
rocksdb_release_snapshot(pState->pTdbState->rocksdb, snapshot); rocksdb_release_snapshot(wrapper->rocksdb, snapshot);
rocksdb_readoptions_destroy(readopts); rocksdb_readoptions_destroy(readopts);
rocksdb_iter_destroy(pIter); rocksdb_iter_destroy(pIter);
return code; return code;
} }
void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { void* streamDefaultIterCreate_rocksdb(SStreamState* pState) {
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
pCur->db = pState->pTdbState->rocksdb; pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt); (rocksdb_readoptions_t**)&pCur->readOpt);
return pCur; return pCur;
@ -2051,7 +2112,8 @@ void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_
void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); }
int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key, int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key,
void* val, int32_t vlen, int64_t ttl) { void* val, int32_t vlen, int64_t ttl) {
int i = streamStateGetCfIdx(pState, cfName); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
int i = streamStateGetCfIdx(pState, cfName);
if (i < 0) { if (i < 0) {
qError("streamState failed to put to cf name:%s", cfName); qError("streamState failed to put to cf name:%s", cfName);
@ -2062,7 +2124,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_wr
char* ttlV = NULL; char* ttlV = NULL;
int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV);
rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[i].idx]; rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx];
rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen);
taosMemoryFree(ttlV); taosMemoryFree(ttlV);
return 0; return 0;
@ -2074,7 +2136,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb
char* ttlV = tmpBuf; char* ttlV = tmpBuf;
int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV);
rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[cfIdx].idx]; SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx];
rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen);
if (tmpBuf == NULL) { if (tmpBuf == NULL) {
@ -2083,8 +2147,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb
return 0; return 0;
} }
int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) {
char* err = NULL; char* err = NULL;
rocksdb_write(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err);
if (err != NULL) { if (err != NULL) {
qError("streamState failed to write batch, err:%s", err); qError("streamState failed to write batch, err:%s", err);
taosMemoryFree(err); taosMemoryFree(err);
@ -2092,3 +2157,13 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) {
} }
return 0; return 0;
} }
uint32_t nextPow2(uint32_t x) {
x = x - 1;
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x + 1;
}

View File

@ -123,7 +123,7 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR
static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) {
if (pTask->checkpointingId == 0) { if (pTask->checkpointingId == 0) {
pTask->checkpointingId = checkpointId; pTask->checkpointingId = checkpointId;
pTask->checkpointAlignCnt = taosArrayGetSize(pTask->childEpInfo); pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList);
} }
ASSERT(pTask->checkpointingId == checkpointId); ASSERT(pTask->checkpointingId == checkpointId);
@ -165,7 +165,7 @@ int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStre
int64_t checkpointId = pReq->checkpointId; int64_t checkpointId = pReq->checkpointId;
int32_t childId = pReq->childId; int32_t childId = pReq->childId;
if (taosArrayGetSize(pTask->childEpInfo) > 0) { if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) {
code = streamAlignCheckpoint(pTask, checkpointId, childId); code = streamAlignCheckpoint(pTask, checkpointId, childId);
if (code > 0) { if (code > 0) {
return 0; return 0;

View File

@ -64,11 +64,11 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT
if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem; SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem;
pStreamBlocks->childId = pTask->selfChildId; pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pSubmit->ver; pStreamBlocks->sourceVer = pSubmit->ver;
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem; SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem;
pStreamBlocks->childId = pTask->selfChildId; pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pMerged->ver; pStreamBlocks->sourceVer = pMerged->ver;
} }
@ -164,26 +164,6 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm
return 0; return 0;
} }
static FORCE_INLINE void streamDataSubmitRefInc(SStreamDataSubmit* pDataSubmit) {
atomic_add_fetch_32(pDataSubmit->dataRef, 1);
}
SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit) {
int32_t len = 0;
if (pSubmit->type == STREAM_INPUT__DATA_SUBMIT) {
len = pSubmit->submit.msgLen;
}
SStreamDataSubmit* pSubmitClone = taosAllocateQitem(sizeof(SStreamDataSubmit), DEF_QITEM, len);
if (pSubmitClone == NULL) {
return NULL;
}
streamDataSubmitRefInc(pSubmit);
memcpy(pSubmitClone, pSubmit, sizeof(SStreamDataSubmit));
return pSubmitClone;
}
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) {
if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) {
SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; SStreamDataBlock* pBlock = (SStreamDataBlock*)dst;

View File

@ -13,16 +13,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "ttimer.h"
#include "streamInc.h" #include "streamInc.h"
#define MAX_BLOCK_NAME_NUM 1024 #define MAX_BLOCK_NAME_NUM 1024
#define DISPATCH_RETRY_INTERVAL_MS 300
#define MAX_CONTINUE_RETRY_COUNT 5
typedef struct SBlockName { typedef struct SBlockName {
uint32_t hashValue; uint32_t hashValue;
char parTbName[TSDB_TABLE_NAME_LEN]; char parTbName[TSDB_TABLE_NAME_LEN];
} SBlockName; } SBlockName;
int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
@ -44,6 +47,37 @@ int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* p
return pEncoder->pos; return pEncoder->pos;
} }
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
@ -72,6 +106,27 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
return 0; return 0;
} }
int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks,
int64_t dstTaskId) {
pReq->streamId = pTask->id.streamId;
pReq->dataSrcVgId = vgId;
pReq->upstreamTaskId = pTask->id.taskId;
pReq->upstreamChildId = pTask->info.selfChildId;
pReq->upstreamNodeId = pTask->info.nodeId;
pReq->blockNum = numOfBlocks;
pReq->taskId = dstTaskId;
pReq->data = taosArrayInit(numOfBlocks, POINTER_BYTES);
pReq->dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t));
if (pReq->data == NULL || pReq->dataLen == NULL) {
taosArrayDestroyP(pReq->data, taosMemoryFree);
taosArrayDestroy(pReq->dataLen);
return TSDB_CODE_OUT_OF_MEMORY;
}
return TSDB_CODE_SUCCESS;
}
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq) { void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq) {
taosArrayDestroyP(pReq->data, taosMemoryFree); taosArrayDestroyP(pReq->data, taosMemoryFree);
taosArrayDestroy(pReq->dataLen); taosArrayDestroy(pReq->dataLen);
@ -132,17 +187,17 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
SStreamRetrieveReq req = { SStreamRetrieveReq req = {
.streamId = pTask->id.streamId, .streamId = pTask->id.streamId,
.srcNodeId = pTask->nodeId, .srcNodeId = pTask->info.nodeId,
.srcTaskId = pTask->id.taskId, .srcTaskId = pTask->id.taskId,
.pRetrieve = pRetrieve, .pRetrieve = pRetrieve,
.retrieveLen = dataStrLen, .retrieveLen = dataStrLen,
}; };
int32_t sz = taosArrayGetSize(pTask->childEpInfo); int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
ASSERT(sz > 0); ASSERT(sz > 0);
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
req.reqId = tGenIdPI64(); req.reqId = tGenIdPI64();
SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->childEpInfo, i); SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
req.dstNodeId = pEpInfo->nodeId; req.dstNodeId = pEpInfo->nodeId;
req.dstTaskId = pEpInfo->taskId; req.dstTaskId = pEpInfo->taskId;
int32_t len; int32_t len;
@ -171,8 +226,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
} }
buf = NULL; buf = NULL;
qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId:0x%" PRIx64, pTask->id.idStr, qDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr,
pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId);
} }
code = 0; code = 0;
@ -182,44 +237,13 @@ CLEAR:
return code; return code;
} }
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) {
void* buf = NULL; void* buf = NULL;
int32_t code = -1; int32_t code = -1;
SRpcMsg msg = {0}; SRpcMsg msg = {0};
int32_t tlen; int32_t tlen;
tEncodeSize(tEncodeSStreamTaskCheckReq, pReq, tlen, code); tEncodeSize(tEncodeStreamTaskCheckReq, pReq, tlen, code);
if (code < 0) { if (code < 0) {
return -1; return -1;
} }
@ -234,7 +258,7 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR
SEncoder encoder; SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen); tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeSStreamTaskCheckReq(&encoder, pReq)) < 0) { if ((code = tEncodeStreamTaskCheckReq(&encoder, pReq)) < 0) {
rpcFreeCont(buf); rpcFreeCont(buf);
return code; return code;
} }
@ -245,21 +269,21 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR
msg.pCont = buf; msg.pCont = buf;
msg.msgType = TDMT_STREAM_TASK_CHECK; msg.msgType = TDMT_STREAM_TASK_CHECK;
qDebug("s-task:%s dispatch check msg to downstream s-task:%" PRIx64 ":%d node %d: check msg", pTask->id.idStr, qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr,
pReq->streamId, pReq->downstreamTaskId, nodeId); pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId);
tmsgSendReq(pEpSet, &msg); tmsgSendReq(pEpSet, &msg);
return 0; return 0;
} }
int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet) { SEpSet* pEpSet) {
void* buf = NULL; void* buf = NULL;
int32_t code = -1; int32_t code = -1;
SRpcMsg msg = {0}; SRpcMsg msg = {0};
int32_t tlen; int32_t tlen;
tEncodeSize(tEncodeSStreamRecoverFinishReq, pReq, tlen, code); tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code);
if (code < 0) { if (code < 0) {
return -1; return -1;
} }
@ -275,7 +299,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov
SEncoder encoder; SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen); tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeSStreamRecoverFinishReq(&encoder, pReq)) < 0) { if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) {
if (buf) { if (buf) {
rpcFreeCont(buf); rpcFreeCont(buf);
} }
@ -286,17 +310,18 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov
msg.contLen = tlen + sizeof(SMsgHead); msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf; msg.pCont = buf;
msg.msgType = TDMT_STREAM_RECOVER_FINISH; msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH;
msg.info.noResp = 1; msg.info.noResp = 1;
tmsgSendReq(pEpSet, &msg); tmsgSendReq(pEpSet, &msg);
qDebug("s-task:%s dispatch recover finish msg to downstream taskId:0x%x node %d: recover finish msg", pTask->id.idStr,
pReq->taskId, vgId);
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qDebug("s-task:%s status:%s dispatch scan-history-data finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus,
pReq->taskId, vgId);
return 0; return 0;
} }
int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL; void* buf = NULL;
int32_t code = -1; int32_t code = -1;
SRpcMsg msg = {0}; SRpcMsg msg = {0};
@ -304,7 +329,10 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in
// serialize // serialize
int32_t tlen; int32_t tlen;
tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code);
if (code < 0) goto FAIL; if (code < 0) {
goto FAIL;
}
code = -1; code = -1;
buf = rpcMallocCont(sizeof(SMsgHead) + tlen); buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) { if (buf == NULL) {
@ -323,16 +351,16 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in
msg.contLen = tlen + sizeof(SMsgHead); msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf; msg.pCont = buf;
msg.msgType = pTask->dispatchMsgType; msg.msgType = pTask->msgInfo.msgType;
qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId);
tmsgSendReq(pEpSet, &msg); return tmsgSendReq(pEpSet, &msg);
code = 0;
return 0;
FAIL: FAIL:
if (buf) rpcFreeCont(buf); if (buf) {
rpcFreeCont(buf);
}
return code; return code;
} }
@ -365,8 +393,6 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
} }
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
/*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/
SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo;
hashValue = hashValue =
@ -386,13 +412,16 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
for (j = 0; j < vgSz; j++) { for (j = 0; j < vgSz; j++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j);
ASSERT(pVgInfo->vgId > 0); ASSERT(pVgInfo->vgId > 0);
if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
return -1; return -1;
} }
if (pReqs[j].blockNum == 0) { if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
} }
pReqs[j].blockNum++; pReqs[j].blockNum++;
found = true; found = true;
break; break;
@ -404,25 +433,17 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) {
int32_t code = 0; int32_t code = 0;
int32_t numOfBlocks = taosArrayGetSize(pData->blocks); int32_t numOfBlocks = taosArrayGetSize(pData->blocks);
ASSERT(numOfBlocks != 0); ASSERT(numOfBlocks != 0);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
SStreamDispatchReq req = { SStreamDispatchReq req = {0};
.streamId = pTask->id.streamId,
.dataSrcVgId = pData->srcVgId,
.upstreamTaskId = pTask->id.taskId,
.upstreamChildId = pTask->selfChildId,
.upstreamNodeId = pTask->nodeId,
.blockNum = numOfBlocks,
};
req.data = taosArrayInit(numOfBlocks, sizeof(void*)); int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId;
req.dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t)); code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId);
if (req.data == NULL || req.dataLen == NULL) { if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroyP(req.data, taosMemoryFree); return code;
taosArrayDestroy(req.dataLen);
return TSDB_CODE_OUT_OF_MEMORY;
} }
for (int32_t i = 0; i < numOfBlocks; i++) { for (int32_t i = 0; i < numOfBlocks; i++) {
@ -438,12 +459,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
int32_t vgId = pTask->fixedEpDispatcher.nodeId; int32_t vgId = pTask->fixedEpDispatcher.nodeId;
SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet; SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet;
int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId;
req.taskId = downstreamTaskId; qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d)", pTask->id.idStr,
pTask->info.selfChildId, numOfBlocks, downstreamTaskId, vgId);
qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to down stream s-task:0x%x in vgId:%d", pTask->id.idStr,
pTask->selfChildId, numOfBlocks, downstreamTaskId, vgId);
code = doSendDispatchMsg(pTask, &req, vgId, pEpSet); code = doSendDispatchMsg(pTask, &req, vgId, pEpSet);
taosArrayDestroyP(req.data, taosMemoryFree); taosArrayDestroyP(req.data, taosMemoryFree);
@ -453,8 +471,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt); int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt);
ASSERT(rspCnt == 0); ASSERT(rspCnt == 0);
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t vgSz = taosArrayGetSize(vgInfo); int32_t vgSz = taosArrayGetSize(vgInfo);
SStreamDispatchReq* pReqs = taosMemoryCalloc(vgSz, sizeof(SStreamDispatchReq)); SStreamDispatchReq* pReqs = taosMemoryCalloc(vgSz, sizeof(SStreamDispatchReq));
if (pReqs == NULL) { if (pReqs == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -462,20 +481,11 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
} }
for (int32_t i = 0; i < vgSz; i++) { for (int32_t i = 0; i < vgSz; i++) {
pReqs[i].streamId = pTask->id.streamId; SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
pReqs[i].dataSrcVgId = pData->srcVgId; code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId);
pReqs[i].upstreamTaskId = pTask->id.taskId; if (code != TSDB_CODE_SUCCESS) {
pReqs[i].upstreamChildId = pTask->selfChildId;
pReqs[i].upstreamNodeId = pTask->nodeId;
pReqs[i].blockNum = 0;
pReqs[i].data = taosArrayInit(0, sizeof(void*));
pReqs[i].dataLen = taosArrayInit(0, sizeof(int32_t));
if (pReqs[i].data == NULL || pReqs[i].dataLen == NULL) {
goto FAIL_SHUFFLE_DISPATCH; goto FAIL_SHUFFLE_DISPATCH;
} }
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
pReqs[i].taskId = pVgInfo->taskId;
} }
for (int32_t i = 0; i < numOfBlocks; i++) { for (int32_t i = 0; i < numOfBlocks; i++) {
@ -483,15 +493,18 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
// TODO: do not use broadcast // TODO: do not use broadcast
if (pDataBlock->info.type == STREAM_DELETE_RESULT) { if (pDataBlock->info.type == STREAM_DELETE_RESULT) {
for (int32_t j = 0; j < vgSz; j++) { for (int32_t j = 0; j < vgSz; j++) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
goto FAIL_SHUFFLE_DISPATCH; goto FAIL_SHUFFLE_DISPATCH;
} }
if (pReqs[j].blockNum == 0) { if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
} }
pReqs[j].blockNum++; pReqs[j].blockNum++;
} }
continue; continue;
} }
@ -500,16 +513,17 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
} }
} }
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->selfChildId, qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->info.selfChildId,
numOfBlocks, vgSz); numOfBlocks, vgSz);
for (int32_t i = 0; i < vgSz; i++) { for (int32_t i = 0; i < vgSz; i++) {
if (pReqs[i].blockNum > 0) { if (pReqs[i].blockNum > 0) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->selfChildId, qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->info.selfChildId,
pReqs[i].blockNum, pVgInfo->vgId); pReqs[i].blockNum, pVgInfo->vgId);
if (doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet) < 0) { code = doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet);
if (code < 0) {
goto FAIL_SHUFFLE_DISPATCH; goto FAIL_SHUFFLE_DISPATCH;
} }
} }
@ -522,46 +536,87 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
taosArrayDestroyP(pReqs[i].data, taosMemoryFree); taosArrayDestroyP(pReqs[i].data, taosMemoryFree);
taosArrayDestroy(pReqs[i].dataLen); taosArrayDestroy(pReqs[i].dataLen);
} }
taosMemoryFree(pReqs); taosMemoryFree(pReqs);
} }
return code; return code;
} }
static void doRetryDispatchData(void* param, void* tmrId) {
SStreamTask* pTask = param;
ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT);
int32_t code = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData);
if (code != TSDB_CODE_SUCCESS) {
qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr);
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS);
}
}
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) {
qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration);
taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer);
}
int32_t streamDispatchStreamBlock(SStreamTask* pTask) { int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH); ASSERT((pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH));
int32_t numOfElems = taosQueueItemSize(pTask->outputQueue->queue); int32_t numOfElems = taosQueueItemSize(pTask->outputQueue->queue);
if (numOfElems > 0) { if (numOfElems > 0) {
qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr, qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr,
numOfElems); numOfElems);
} }
// to make sure only one dispatch is running
int8_t old = int8_t old =
atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT);
if (old != TASK_OUTPUT_STATUS__NORMAL) { if (old != TASK_OUTPUT_STATUS__NORMAL) {
qDebug("s-task:%s task wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old);
return 0; return 0;
} }
ASSERT(pTask->msgInfo.pData == NULL);
qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pTask->outputStatus); qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pTask->outputStatus);
SStreamDataBlock* pDispatchedBlock = streamQueueNextItem(pTask->outputQueue); SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue);
if (pDispatchedBlock == NULL) { if (pBlock == NULL) {
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
qDebug("s-task:%s stop dispatching since no output in output queue, output status:%d", pTask->id.idStr, qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pTask->outputStatus);
pTask->outputStatus);
return 0; return 0;
} }
ASSERT(pDispatchedBlock->type == STREAM_INPUT__DATA_BLOCK); pTask->msgInfo.pData = pBlock;
ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK);
int32_t code = streamDispatchAllBlocks(pTask, pDispatchedBlock); int32_t retryCount = 0;
if (code != TSDB_CODE_SUCCESS) {
streamQueueProcessFail(pTask->outputQueue); while (1) {
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); int32_t code = streamDispatchAllBlocks(pTask, pBlock);
qDebug("s-task:%s failed to dispatch msg to downstream, output status:%d", pTask->id.idStr, pTask->outputStatus); if (code == TSDB_CODE_SUCCESS) {
break;
}
qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr,
tstrerror(terrno), pTask->outputStatus, retryCount);
// todo deal with only partially success dispatch case
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore
destroyStreamDataBlock(pTask->msgInfo.pData);
pTask->msgInfo.pData = NULL;
return code;
}
if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry
qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms", pTask->id.idStr,
retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS);
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS);
break;
}
} }
// this block can be freed only when it has been pushed to down stream. // this block can not be deleted until it has been sent to downstream task successfully.
destroyStreamDataBlock(pDispatchedBlock); return TSDB_CODE_SUCCESS;
return code;
} }

View File

@ -108,10 +108,11 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0));
block.info.type = STREAM_PULL_OVER; block.info.type = STREAM_PULL_OVER;
block.info.childId = pTask->selfChildId; block.info.childId = pTask->info.selfChildId;
taosArrayPush(pRes, &block); taosArrayPush(pRes, &block);
numOfBlocks += 1; numOfBlocks += 1;
qDebug("s-task:%s(child %d) processed retrieve, reqId:0x%" PRIx64, pTask->id.idStr, pTask->selfChildId,
qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId,
pRetrieveBlock->reqId); pRetrieveBlock->reqId);
} }
@ -127,7 +128,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
SSDataBlock block = {0}; SSDataBlock block = {0};
assignOneDataBlock(&block, output); assignOneDataBlock(&block, output);
block.info.childId = pTask->selfChildId; block.info.childId = pTask->info.selfChildId;
size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block);
numOfBlocks += 1; numOfBlocks += 1;
@ -135,7 +136,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
taosArrayPush(pRes, &block); taosArrayPush(pRes, &block);
qDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr, qDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr,
pTask->selfChildId, numOfBlocks, size / 1048576.0); pTask->info.selfChildId, numOfBlocks, size / 1048576.0);
// current output should be dispatched to down stream nodes // current output should be dispatched to down stream nodes
if (numOfBlocks >= MAX_STREAM_RESULT_DUMP_THRESHOLD) { if (numOfBlocks >= MAX_STREAM_RESULT_DUMP_THRESHOLD) {
@ -164,7 +165,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
int32_t code = 0; int32_t code = 0;
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
void* exec = pTask->exec.pExecutor; void* exec = pTask->exec.pExecutor;
qSetStreamOpOpen(exec); qSetStreamOpOpen(exec);
@ -179,7 +180,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
int32_t batchCnt = 0; int32_t batchCnt = 0;
while (1) { while (1) {
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { if (streamTaskShouldStop(&pTask->status)) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
return 0; return 0;
} }
@ -189,18 +190,37 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
if (qExecTask(exec, &output, &ts) < 0) { if (qExecTask(exec, &output, &ts) < 0) {
continue; continue;
} }
if (output == NULL) { if (output == NULL) {
if (qStreamRecoverScanFinished(exec)) { if (qStreamRecoverScanFinished(exec)) {
finished = true; finished = true;
} else { } else {
qSetStreamOpOpen(exec); qSetStreamOpOpen(exec);
if (streamTaskShouldPause(&pTask->status)) {
SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
if (qRes == NULL) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
qRes->type = STREAM_INPUT__DATA_BLOCK;
qRes->blocks = pRes;
code = streamTaskOutputResultBlock(pTask, qRes);
if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
taosFreeQitem(qRes);
return code;
}
return 0;
}
} }
break; break;
} }
SSDataBlock block = {0}; SSDataBlock block = {0};
assignOneDataBlock(&block, output); assignOneDataBlock(&block, output);
block.info.childId = pTask->selfChildId; block.info.childId = pTask->info.selfChildId;
taosArrayPush(pRes, &block); taosArrayPush(pRes, &block);
batchCnt++; batchCnt++;
@ -237,11 +257,6 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
taosFreeQitem(qRes); taosFreeQitem(qRes);
return code; return code;
} }
//
// if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
// qDebug("s-task:%s scan exec dispatch blocks:%d", pTask->id.idStr, batchCnt);
// streamDispatchStreamBlock(pTask);
// }
if (finished) { if (finished) {
break; break;
@ -275,7 +290,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) {
return -1; return -1;
} }
if (pTask->taskLevel == TASK_LEVEL__SINK) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(((SStreamQueueItem*)pItem)->type == STREAM_INPUT__DATA_BLOCK); ASSERT(((SStreamQueueItem*)pItem)->type == STREAM_INPUT__DATA_BLOCK);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pItem); streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pItem);
} }
@ -317,6 +332,75 @@ int32_t updateCheckPointInfo(SStreamTask* pTask) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) {
// wait for the stream task to be idle
int64_t st = taosGetTimestampMs();
while (!streamTaskIsIdle(pStreamTask)) {
qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr,
pTask->info.taskLevel, pStreamTask->id.idStr);
taosMsleep(100);
}
double el = (taosGetTimestampMs() - st) / 1000.0;
if (el > 0) {
qDebug("s-task:%s wait for stream task:%s for %.2fs to handle all data in inputQ", pTask->id.idStr,
pStreamTask->id.idStr, el);
}
}
static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) {
SStreamTask* pStreamTask = streamMetaAcquireTask(pTask->pMeta, pTask->streamTaskId.taskId);
qDebug("s-task:%s scan history task end, update stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr);
// todo handle stream task is dropped here
ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId);
STimeWindow* pTimeWindow = &pStreamTask->dataRange.window;
// here we need to wait for the stream task handle all data in the input queue.
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT);
} else {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__NORMAL);
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
}
// wait for the stream task to be idle
waitForTaskIdle(pTask, pStreamTask);
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
// update the scan data range for source task.
qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " update to %" PRId64 " - %" PRId64
", status:%s, sched-status:%d",
pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN,
pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus);
// todo transfer state
} else {
// for sink tasks, they are continue to execute, no need to be halt.
// the process should be stopped for a while, during the term of transfer task state.
// OR wait for the inputQ && outputQ of agg tasks are all consumed, and then start the state transfer
qDebug("s-task:%s no need to update time window, for non-source task", pStreamTask->id.idStr);
// todo transfer state
}
// expand the query time window for stream scanner
pTimeWindow->skey = INT64_MIN;
qResetStreamInfoTimeWindow(pStreamTask->exec.pExecutor);
// transfer the ownership of executor state
streamTaskReleaseState(pTask);
streamTaskReloadState(pStreamTask);
streamSetStatusNormal(pStreamTask);
streamSchedExec(pStreamTask);
streamMetaReleaseTask(pTask->pMeta, pStreamTask);
return TSDB_CODE_SUCCESS;
}
/** /**
* todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the
* appropriate batch of blocks should be handled in 5 to 10 sec. * appropriate batch of blocks should be handled in 5 to 10 sec.
@ -331,20 +415,23 @@ int32_t streamExecForAll(SStreamTask* pTask) {
SStreamQueueItem* pInput = NULL; SStreamQueueItem* pInput = NULL;
// merge multiple input data if possible in the input queue. // merge multiple input data if possible in the input queue.
qDebug("s-task:%s start to extract data block from inputQ", id); qDebug("s-task:%s start to extract data block from inputQ, status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus));
while (1) { while (1) {
if (streamTaskShouldPause(&pTask->status)) { // downstream task's input queue is blocked, stop immediately
if (streamTaskShouldPause(&pTask->status) || (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) ||
streamTaskShouldStop(&pTask->status)) {
if (batchSize > 1) { if (batchSize > 1) {
break; break;
} else { } else {
qDebug("123 %s", pTask->id.idStr);
return 0; return 0;
} }
} }
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) { if (qItem == NULL) {
if (pTask->taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) {
times++; times++;
taosMsleep(10); taosMsleep(10);
qDebug("===stream===try again batchSize:%d", batchSize); qDebug("===stream===try again batchSize:%d", batchSize);
@ -358,7 +445,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
if (pInput == NULL) { if (pInput == NULL) {
pInput = qItem; pInput = qItem;
streamQueueProcessSuccess(pTask->inputQueue); streamQueueProcessSuccess(pTask->inputQueue);
if (pTask->taskLevel == TASK_LEVEL__SINK) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
break; break;
} }
} else { } else {
@ -389,28 +476,20 @@ int32_t streamExecForAll(SStreamTask* pTask) {
} }
if (pInput == NULL) { if (pInput == NULL) {
if (pTask->info.fillHistory && pTask->status.transferState) {
int32_t code = streamTransferStateToStreamTask(pTask);
}
break; break;
} }
if (pTask->taskLevel == TASK_LEVEL__SINK) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK);
qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput);
continue; continue;
} }
// wait for the task to be ready to go
while (pTask->taskLevel == TASK_LEVEL__SOURCE) {
int8_t status = atomic_load_8(&pTask->status.taskStatus);
if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__PAUSE) {
qError("stream task wait for the end of fill history, s-task:%s, status:%d", id,
atomic_load_8(&pTask->status.taskStatus));
taosMsleep(100);
} else {
break;
}
}
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize); qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize);
@ -423,7 +502,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput;
qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput;
qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit,
@ -455,14 +534,34 @@ int32_t streamExecForAll(SStreamTask* pTask) {
streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks);
double el = (taosGetTimestampMs() - st) / 1000.0; double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", qDebug("s-task:%s batch of (%d)input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d",
id, el, resSize / 1048576.0, totalBlocks); id, batchSize, el, resSize / 1048576.0, totalBlocks);
streamFreeQitem(pInput); streamFreeQitem(pInput);
} }
return 0; return 0;
} }
bool streamTaskIsIdle(const SStreamTask* pTask) {
int32_t numOfItems = taosQueueItemSize(pTask->inputQueue->queue);
if (numOfItems > 0) {
return false;
}
numOfItems = taosQallItemSize(pTask->inputQueue->qall);
if (numOfItems > 0) {
return false;
}
// blocked by downstream task
if (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) {
return false;
}
return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE);
}
int32_t streamTryExec(SStreamTask* pTask) { int32_t streamTryExec(SStreamTask* pTask) {
// this function may be executed by multi-threads, so status check is required. // this function may be executed by multi-threads, so status check is required.
int8_t schedStatus = int8_t schedStatus =
@ -477,7 +576,8 @@ int32_t streamTryExec(SStreamTask* pTask) {
// todo the task should be commit here // todo the task should be commit here
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
qDebug("s-task:%s exec completed", pTask->id.idStr); qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus),
pTask->status.schedStatus);
if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) && if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) &&
(!streamTaskShouldPause(&pTask->status))) { (!streamTaskShouldPause(&pTask->status))) {
@ -487,3 +587,25 @@ int32_t streamTryExec(SStreamTask* pTask) {
return 0; return 0;
} }
int32_t streamTaskReleaseState(SStreamTask* pTask) {
qDebug("s-task:%s release exec state", pTask->id.idStr);
void* pExecutor = pTask->exec.pExecutor;
if (pExecutor != NULL) {
int32_t code = qStreamOperatorReleaseState(pExecutor);
return code;
} else {
return TSDB_CODE_SUCCESS;
}
}
int32_t streamTaskReloadState(SStreamTask* pTask) {
qDebug("s-task:%s reload exec state", pTask->id.idStr);
void* pExecutor = pTask->exec.pExecutor;
if (pExecutor != NULL) {
int32_t code = qStreamOperatorReloadState(pExecutor);
return code;
} else {
return TSDB_CODE_SUCCESS;
}
}

View File

@ -21,10 +21,18 @@
static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT;
int32_t streamBackendId = 0; int32_t streamBackendId = 0;
static void streamMetaEnvInit() { streamBackendId = taosOpenRef(20, streamBackendCleanup); } int32_t streamBackendCfWrapperId = 0;
static void streamMetaEnvInit() {
streamBackendId = taosOpenRef(64, streamBackendCleanup);
streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup);
}
void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); }
void streamMetaCleanup() { taosCloseRef(streamBackendId); } void streamMetaCleanup() {
taosCloseRef(streamBackendId);
taosCloseRef(streamBackendCfWrapperId);
}
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) { SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) {
int32_t code = -1; int32_t code = -1;
@ -93,10 +101,14 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err; goto _err;
} }
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
pMeta->pTaskBackendUnique =
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
taosMemoryFree(streamPath); taosMemoryFree(streamPath);
taosInitRWLatch(&pMeta->lock); taosInitRWLatch(&pMeta->lock);
taosThreadMutexInit(&pMeta->backendMutex, NULL);
return pMeta; return pMeta;
_err: _err:
@ -127,9 +139,14 @@ void streamMetaClose(SStreamMeta* pMeta) {
} }
SStreamTask* pTask = *(SStreamTask**)pIter; SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->timer) { if (pTask->schedTimer) {
taosTmrStop(pTask->timer); taosTmrStop(pTask->schedTimer);
pTask->timer = NULL; pTask->schedTimer = NULL;
}
if (pTask->launchTaskTimer) {
taosTmrStop(pTask->launchTaskTimer);
pTask->launchTaskTimer = NULL;
} }
tFreeStreamTask(pTask); tFreeStreamTask(pTask);
@ -139,6 +156,8 @@ void streamMetaClose(SStreamMeta* pMeta) {
taosRemoveRef(streamBackendId, pMeta->streamBackendRid); taosRemoveRef(streamBackendId, pMeta->streamBackendRid);
pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList); pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList);
taosMemoryFree(pMeta->path); taosMemoryFree(pMeta->path);
taosThreadMutexDestroy(&pMeta->backendMutex);
taosHashCleanup(pMeta->pTaskBackendUnique);
taosMemoryFree(pMeta); taosMemoryFree(pMeta);
} }
@ -266,18 +285,53 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
} }
void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
taosWLockLatch(&pMeta->lock); SStreamTask* pTask = NULL;
// pre-delete operation
taosWLockLatch(&pMeta->lock);
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) { if (ppTask) {
SStreamTask* pTask = *ppTask; pTask = *ppTask;
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
} else {
qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId);
taosWUnLockLatch(&pMeta->lock);
return;
}
taosWUnLockLatch(&pMeta->lock);
qDebug("s-task:0x%x set task status:%s", taskId, streamGetTaskStatusStr(TASK_STATUS__DROPPING));
while(1) {
taosRLockLatch(&pMeta->lock);
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) {
if ((*ppTask)->status.timerActive == 0) {
taosRUnLockLatch(&pMeta->lock);
break;
}
taosMsleep(10);
qDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr);
taosRUnLockLatch(&pMeta->lock);
} else {
taosRUnLockLatch(&pMeta->lock);
break;
}
}
// let's do delete of stream task
taosWLockLatch(&pMeta->lock);
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) {
taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t));
tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
int32_t num = taosArrayGetSize(pMeta->pTaskList); ASSERT(pTask->status.timerActive == 0);
int32_t num = taosArrayGetSize(pMeta->pTaskList);
qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1); qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1);
for (int32_t i = 0; i < num; ++i) { for (int32_t i = 0; i < num; ++i) {
int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i); int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i);
@ -380,6 +434,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
taosMemoryFree(pTask); taosMemoryFree(pTask);
continue; continue;
} }
if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, sizeof(void*)) < 0) { if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, sizeof(void*)) < 0) {
tdbFree(pKey); tdbFree(pKey);
tdbFree(pVal); tdbFree(pVal);
@ -388,10 +443,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
return -1; return -1;
} }
if (pTask->fillHistory) { ASSERT(pTask->status.downstreamReady == 0);
ASSERT(pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM);
streamTaskCheckDownstream(pTask, ver);
}
} }
tdbFree(pKey); tdbFree(pKey);

View File

@ -14,93 +14,131 @@
*/ */
#include "streamInc.h" #include "streamInc.h"
#include "ttimer.h"
#include "wal.h"
int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated) {
qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); SStreamScanHistoryReq req;
streamBuildSourceRecover1Req(pTask, &req, igUntreated);
int32_t len = sizeof(SStreamScanHistoryReq);
if (pTask->taskLevel == TASK_LEVEL__SOURCE) { void* serializedReq = rpcMallocCont(len);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); if (serializedReq == NULL) {
qDebug("s-task:%s set task status:%d and start to recover", pTask->id.idStr, pTask->status.taskStatus); return -1;
}
streamSetParamForRecover(pTask); memcpy(serializedReq, &req, len);
streamSourceRecoverPrepareStep1(pTask, version);
SStreamRecoverStep1Req req; SRpcMsg rpcMsg = {.contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_SCAN_HISTORY};
streamBuildSourceRecover1Req(pTask, &req); if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) {
int32_t len = sizeof(SStreamRecoverStep1Req); /*ASSERT(0);*/
void* serializedReq = rpcMallocCont(len);
if (serializedReq == NULL) {
return -1;
}
memcpy(serializedReq, &req, len);
SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE };
if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) {
/*ASSERT(0);*/
}
} else if (pTask->taskLevel == TASK_LEVEL__AGG) {
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
streamSetParamForRecover(pTask);
streamAggRecoverPrepare(pTask);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
// sink nodes has no specified operation for fill history
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
} }
return 0; return 0;
} }
// checkstatus const char* streamGetTaskStatusStr(int32_t status) {
int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { switch(status) {
qDebug("s-task:%s in fill history stage, ver:%"PRId64, pTask->id.idStr, version); case TASK_STATUS__NORMAL: return "normal";
case TASK_STATUS__SCAN_HISTORY: return "scan-history";
case TASK_STATUS__HALT: return "halt";
case TASK_STATUS__PAUSE: return "paused";
default:return "";
}
}
static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) {
SVersionRange* pRange = &pTask->dataRange.range;
qDebug("s-task:%s vgId:%d status:%s, start scan-history-data task, verRange:%" PRId64 " - %" PRId64, pTask->id.idStr,
pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus), pRange->minVer, pRange->maxVer);
streamSetParamForScanHistoryData(pTask);
streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window);
int32_t code = streamStartRecoverTask(pTask, 0);
return code;
}
int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
return doLaunchScanHistoryTask(pTask);
} else {
ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL);
qDebug("s-task:%s no need to scan-history-data, status:%s, sched-status:%d, ver:%" PRId64, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus,
walReaderGetCurrentVer(pTask->exec.pWalReader));
}
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
streamSetStatusNormal(pTask);
streamSetParamForScanHistoryData(pTask);
streamAggRecoverPrepare(pTask);
} else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
streamSetStatusNormal(pTask);
qDebug("s-task:%s sink task convert to normal immediately", pTask->id.idStr);
}
return 0;
}
// check status
int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) {
SHistDataRange* pRange = &pTask->dataRange;
STimeWindow* pWindow = &pRange->window;
SStreamTaskCheckReq req = { SStreamTaskCheckReq req = {
.streamId = pTask->id.streamId, .streamId = pTask->id.streamId,
.upstreamTaskId = pTask->id.taskId, .upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->nodeId, .upstreamNodeId = pTask->info.nodeId,
.childId = pTask->selfChildId, .childId = pTask->info.selfChildId,
}; };
// serialize // serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
req.reqId = tGenIdPI64(); req.reqId = tGenIdPI64();
req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId;
req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; req.downstreamTaskId = pTask->fixedEpDispatcher.taskId;
pTask->checkReqId = req.reqId; pTask->checkReqId = req.reqId;
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64
req.downstreamNodeId); "-%" PRId64 ", req:0x%" PRIx64,
pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer,
pWindow->skey, pWindow->ekey, req.reqId);
streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo); int32_t numOfVgs = taosArrayGetSize(vgInfo);
pTask->recoverTryingDownstream = numOfVgs; pTask->notReadyTasks = numOfVgs;
pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t)); pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t));
qDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64,
pTask->id.idStr, numOfVgs, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey);
for (int32_t i = 0; i < numOfVgs; i++) { for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.reqId = tGenIdPI64(); req.reqId = tGenIdPI64();
taosArrayPush(pTask->checkReqIds, &req.reqId); taosArrayPush(pTask->checkReqIds, &req.reqId);
req.downstreamNodeId = pVgInfo->vgId; req.downstreamNodeId = pVgInfo->vgId;
req.downstreamTaskId = pVgInfo->taskId; req.downstreamTaskId = pVgInfo->taskId;
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (shuffle)", pTask->id.idStr, pTask->nodeId, qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId); req.downstreamTaskId, req.downstreamNodeId, i);
streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
} }
} else { } else {
qDebug("s-task:%s at node %d direct launch recover since no downstream", pTask->id.idStr, pTask->nodeId); pTask->status.downstreamReady = 1;
streamTaskLaunchRecover(pTask, version); qDebug("s-task:%s (vgId:%d) no downstream tasks, set downstream checked, try to launch scan-history-data, status:%s",
pTask->id.idStr, pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
} }
return 0; return 0;
} }
int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
SStreamTaskCheckReq req = { SStreamTaskCheckReq req = {
.reqId = pRsp->reqId, .reqId = pRsp->reqId,
.streamId = pRsp->streamId, .streamId = pRsp->streamId,
@ -111,7 +149,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp
.childId = pRsp->childId, .childId = pRsp->childId,
}; };
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (recheck)", pTask->id.idStr, pTask->nodeId, qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId); req.downstreamTaskId, req.downstreamNodeId);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
@ -135,11 +173,9 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask) {
return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0; return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0;
} }
int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); ASSERT(pTask->id.taskId == pRsp->upstreamTaskId);
const char* id = pTask->id.idStr;
qDebug("s-task:%s at node %d recv check rsp from task:0x%x at node %d: status %d", pTask->id.idStr,
pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status);
if (pRsp->status == 1) { if (pRsp->status == 1) {
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
@ -158,146 +194,386 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp*
return -1; return -1;
} }
int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1); int32_t left = atomic_sub_fetch_32(&pTask->notReadyTasks, 1);
ASSERT(left >= 0); ASSERT(left >= 0);
if (left == 0) { if (left == 0) {
taosArrayDestroy(pTask->checkReqIds); taosArrayDestroy(pTask->checkReqIds);
pTask->checkReqIds = NULL; pTask->checkReqIds = NULL;
qDebug("s-task:%s all %d downstream tasks are ready, now enter into recover stage", pTask->id.idStr, numOfReqs); if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
streamTaskLaunchRecover(pTask, version); qDebug("s-task:%s all %d downstream tasks are ready, now enter into scan-history-data stage, status:%s", id, numOfReqs,
streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
} else {
ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL);
qDebug("s-task:%s fixed downstream task is ready, now ready for data from wal, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
}
} else {
int32_t total = taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos);
qDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id,
pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left);
} }
} else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
if (pRsp->reqId != pTask->checkReqId) { if (pRsp->reqId != pTask->checkReqId) {
return -1; return -1;
} }
streamTaskLaunchRecover(pTask, version); // set the downstream tasks have been checked flag
ASSERT(pTask->status.downstreamReady == 0);
pTask->status.downstreamReady = 1;
ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY || pTask->status.taskStatus == TASK_STATUS__NORMAL);
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
} else {
qDebug("s-task:%s fixed downstream task is ready, ready for data from inputQ, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
}
} else { } else {
ASSERT(0); ASSERT(0);
} }
} else { // not ready, wait for 100ms and retry } else { // not ready, wait for 100ms and retry
qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr, qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId,
pRsp->downstreamTaskId, pRsp->downstreamNodeId); pRsp->downstreamNodeId);
taosMsleep(100); taosMsleep(100);
streamRecheckOneDownstream(pTask, pRsp); streamRecheckDownstream(pTask, pRsp);
} }
return 0; return 0;
} }
// common // common
int32_t streamSetParamForRecover(SStreamTask* pTask) { int32_t streamSetParamForScanHistoryData(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor; qDebug("s-task:%s set operator option for scan-history-data", pTask->id.idStr);
return qStreamSetParamForRecover(exec); return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor);
} }
int32_t streamRestoreParam(SStreamTask* pTask) { int32_t streamRestoreParam(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor; qDebug("s-task:%s restore operator param after scan-history-data", pTask->id.idStr);
return qStreamRestoreParam(exec); return qRestoreStreamOperatorOption(pTask->exec.pExecutor);
} }
int32_t streamSetStatusNormal(SStreamTask* pTask) { int32_t streamSetStatusNormal(SStreamTask* pTask) {
qDebug("s-task:%s set task status to be normal, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus));
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
return 0; return 0;
} }
// source // source
int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) { int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) {
void* exec = pTask->exec.pExecutor; return qStreamSourceScanParamForHistoryScanStep1(pTask->exec.pExecutor, pVerRange, pWindow);
return qStreamSourceRecoverStep1(exec, ver);
} }
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) { int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) {
pReq->msgHead.vgId = pTask->nodeId; return qStreamSourceScanParamForHistoryScanStep2(pTask->exec.pExecutor, pVerRange, pWindow);
}
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) {
pReq->msgHead.vgId = pTask->info.nodeId;
pReq->streamId = pTask->id.streamId; pReq->streamId = pTask->id.streamId;
pReq->taskId = pTask->id.taskId; pReq->taskId = pTask->id.taskId;
pReq->igUntreated = igUntreated;
return 0; return 0;
} }
int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) { int32_t streamSourceScanHistoryData(SStreamTask* pTask) {
return streamScanExec(pTask, 100); return streamScanExec(pTask, 100);
} }
int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) { int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) {
pReq->msgHead.vgId = pTask->nodeId; SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId };
pReq->streamId = pTask->id.streamId;
pReq->taskId = pTask->id.taskId;
return 0;
}
int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) {
void* exec = pTask->exec.pExecutor;
const char* id = pTask->id.idStr;
int64_t st = taosGetTimestampMs();
qDebug("s-task:%s recover step2(blocking stage) started", id);
if (qStreamSourceRecoverStep2(exec, ver) < 0) {
}
int32_t code = streamScanExec(pTask, 100);
double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s recover step2(blocking stage) ended, elapsed time:%.2fs", id, el);
return code;
}
int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) {
SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->selfChildId };
// serialize // serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
qDebug("s-task:%s send recover finish msg to downstream (fix-dispatch) to taskId:%d, status:%d", pTask->id.idStr,
pTask->fixedEpDispatcher.taskId, pTask->status.taskStatus);
req.taskId = pTask->fixedEpDispatcher.taskId; req.taskId = pTask->fixedEpDispatcher.taskId;
streamDispatchOneRecoverFinishReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t vgSz = taosArrayGetSize(vgInfo); int32_t numOfVgs = taosArrayGetSize(vgInfo);
for (int32_t i = 0; i < vgSz; i++) {
qDebug("s-task:%s send scan-history-data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr,
numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus));
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.taskId = pVgInfo->taskId; req.taskId = pVgInfo->taskId;
streamDispatchOneRecoverFinishReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
} }
} }
return 0;
}
static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) {
if (buf) {
rpcFreeCont(buf);
}
return code;
}
tEncoderClear(&encoder);
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = TDMT_STREAM_TRANSFER_STATE;
msg.info.noResp = 1;
tmsgSendReq(pEpSet, &msg);
qDebug("s-task:%s dispatch transfer state msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pReq->taskId, vgId);
return 0;
}
int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) {
SStreamTransferReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId };
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
qDebug("s-task:%s send transfer state msg to downstream (fix-dispatch) to taskId:0x%x, status:%s", pTask->id.idStr,
pTask->fixedEpDispatcher.taskId, streamGetTaskStatusStr(pTask->status.taskStatus));
req.taskId = pTask->fixedEpDispatcher.taskId;
doDispatchTransferMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.taskId = pVgInfo->taskId;
doDispatchTransferMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
}
return 0; return 0;
} }
// agg // agg
int32_t streamAggRecoverPrepare(SStreamTask* pTask) { int32_t streamAggRecoverPrepare(SStreamTask* pTask) {
pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo); pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList);
qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream); qDebug("s-task:%s agg task is ready and wait for %d upstream tasks complete scan-history procedure", pTask->id.idStr,
pTask->numOfWaitingUpstream);
return 0; return 0;
} }
int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor; void* exec = pTask->exec.pExecutor;
if (qStreamRestoreParam(exec) < 0) { if (qRestoreStreamOperatorOption(exec) < 0) {
return -1; return -1;
} }
if (qStreamRecoverFinish(exec) < 0) { if (qStreamRecoverFinish(exec) < 0) {
return -1; return -1;
} }
streamSetStatusNormal(pTask);
// streamSetStatusNormal(pTask);
return 0; return 0;
} }
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) { int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId) {
if (pTask->taskLevel == TASK_LEVEL__AGG) { if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1); int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1);
qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left);
ASSERT(left >= 0); ASSERT(left >= 0);
if (left == 0) { if (left == 0) {
streamAggChildrenRecoverFinish(pTask); int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList);
qDebug("s-task:%s all %d upstream tasks finish scan-history data", pTask->id.idStr, numOfTasks);
streamAggUpstreamScanHistoryFinish(pTask);
} else {
qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d",
pTask->id.idStr, taskId, childId, left);
} }
} }
return 0; return 0;
} }
int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) { static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) {
pHTask->dataRange.range.minVer = 0;
pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer;
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
qDebug("s-task:%s set the launch condition for fill history s-task:%s, window:%" PRId64 " - %" PRId64
" ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey,
pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer);
} else {
qDebug("s-task:%s no fill history condition for non-source task:%s", pTask->id.idStr, pHTask->id.idStr);
}
// check if downstream tasks have been ready
streamTaskCheckDownstreamTasks(pHTask);
}
typedef struct SStreamTaskRetryInfo {
SStreamMeta* pMeta;
int32_t taskId;
} SStreamTaskRetryInfo;
static void tryLaunchHistoryTask(void* param, void* tmrId) {
SStreamTaskRetryInfo* pInfo = param;
SStreamMeta* pMeta = pInfo->pMeta;
qDebug("s-task:0x%x in timer to launch related history task", pInfo->taskId);
taosWLockLatch(&pMeta->lock);
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &pInfo->taskId, sizeof(int32_t));
if (ppTask) {
ASSERT((*ppTask)->status.timerActive == 1);
if (streamTaskShouldStop(&(*ppTask)->status)) {
const char* pStatus = streamGetTaskStatusStr((*ppTask)->status.taskStatus);
qDebug("s-task:%s status:%s quit timer task", (*ppTask)->id.idStr, pStatus);
(*ppTask)->status.timerActive = 0;
taosWUnLockLatch(&pMeta->lock);
return;
}
}
taosWUnLockLatch(&pMeta->lock);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->taskId);
if (pTask != NULL) {
ASSERT(pTask->status.timerActive == 1);
// abort the timer if intend to stop task
SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.taskId);
if (pHTask == NULL && (!streamTaskShouldStop(&pTask->status))) {
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qWarn(
"s-task:%s vgId:%d status:%s failed to launch history task:0x%x, since it may not be built, or have been "
"destroyed, or should stop exec",
pTask->id.idStr, pMeta->vgId, pStatus, pTask->historyTaskId.taskId);
taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer);
streamMetaReleaseTask(pMeta, pTask);
return;
}
if (pHTask != NULL) {
doCheckDownstreamStatus(pTask, pHTask);
streamMetaReleaseTask(pMeta, pHTask);
}
// not in timer anymore
pTask->status.timerActive = 0;
streamMetaReleaseTask(pMeta, pTask);
} else {
qError("s-task:0x%x failed to load task, it may have been destroyed", pInfo->taskId);
}
taosMemoryFree(pInfo);
}
// todo fix the bug: 2. race condition
// an fill history task needs to be started.
int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
int32_t hTaskId = pTask->historyTaskId.taskId;
// Set the execute conditions, including the query time window and the version range
SStreamTask** pHTask = taosHashGet(pMeta->pTasks, &hTaskId, sizeof(hTaskId));
if (pHTask == NULL) {
qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr,
pMeta->vgId, hTaskId);
SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo));
pInfo->taskId = pTask->id.taskId;
pInfo->pMeta = pTask->pMeta;
if (pTask->launchTaskTimer == NULL) {
pTask->launchTaskTimer = taosTmrStart(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer);
if (pTask->launchTaskTimer == NULL) {
// todo failed to create timer
} else {
pTask->status.timerActive = 1; // timer is active
qDebug("s-task:%s set timer active flag", pTask->id.idStr);
}
} else { // timer exists
pTask->status.timerActive = 1;
qDebug("s-task:%s set timer active flag, task timer not null", pTask->id.idStr);
taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer);
}
// try again in 500ms
return TSDB_CODE_SUCCESS;
}
doCheckDownstreamStatus(pTask, *pHTask);
return TSDB_CODE_SUCCESS;
}
int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
return 0;
}
// restore param
int32_t code = streamRestoreParam(pTask);
if (code < 0) {
return -1;
}
// dispatch recover finish req to all related downstream task
code = streamDispatchScanHistoryFinishMsg(pTask);
if (code < 0) {
return -1;
}
ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY);
// ready to process data from inputQ
streamSetStatusNormal(pTask);
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
// todo check rsp, commit data
streamMetaSaveTask(pMeta, pTask);
return 0;
}
bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverScanStep1Finished(exec);
}
bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverScanStep2Finished(exec);
}
int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverSetAllStepFinished(exec);
}
int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->reqId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->reqId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
@ -310,7 +586,7 @@ int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) { int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->reqId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->reqId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
@ -323,7 +599,7 @@ int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq
return 0; return 0;
} }
int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) { int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->reqId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->reqId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
@ -337,7 +613,7 @@ int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) { int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->reqId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->reqId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
@ -351,7 +627,7 @@ int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp
return 0; return 0;
} }
int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) { int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
@ -359,7 +635,7 @@ int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverF
tEndEncode(pEncoder); tEndEncode(pEncoder);
return pEncoder->pos; return pEncoder->pos;
} }
int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) { int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1;
@ -367,3 +643,41 @@ int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishR
tEndDecode(pDecoder); tEndDecode(pDecoder);
return 0; return 0;
} }
// todo handle race condition, this task may be destroyed
void streamPrepareNdoCheckDownstream(SStreamTask* pTask) {
if (pTask->info.fillHistory) {
qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
} else {
// calculate the correct start time window, and start the handle the history data for the main task.
if (pTask->historyTaskId.taskId != 0) {
// check downstream tasks for associated scan-history-data tasks
streamCheckHistoryTaskDownstrem(pTask);
// launch current task
SHistDataRange* pRange = &pTask->dataRange;
int64_t ekey = pRange->window.ekey + 1;
int64_t ver = pRange->range.minVer;
pRange->window.skey = ekey;
pRange->window.ekey = INT64_MAX;
pRange->range.minVer = 0;
pRange->range.maxVer = ver;
qDebug("s-task:%s level:%d fill-history task exists, update stream time window:%" PRId64 " - %" PRId64
", ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer,
pRange->range.maxVer);
} else {
SHistDataRange* pRange = &pTask->dataRange;
qDebug("s-task:%s no associated scan-history task, stream time window:%" PRId64 " - %" PRId64
", ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer);
}
ASSERT(pTask->status.downstreamReady == 0);
// check downstream tasks for itself
streamTaskCheckDownstreamTasks(pTask);
}
}

View File

@ -116,16 +116,33 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
pState->taskId = pStreamTask->id.taskId; pState->taskId = pStreamTask->id.taskId;
pState->streamId = pStreamTask->id.streamId; pState->streamId = pStreamTask->id.streamId;
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId);
#ifdef USE_ROCKSDB #ifdef USE_ROCKSDB
SStreamMeta* pMeta = pStreamTask->pMeta; SStreamMeta* pMeta = pStreamTask->pMeta;
pState->streamBackendRid = pMeta->streamBackendRid; pState->streamBackendRid = pMeta->streamBackendRid;
int code = streamStateOpenBackend(pMeta->streamBackend, pState); // taosWLockLatch(&pMeta->lock);
if (code == -1) { taosThreadMutexLock(&pMeta->backendMutex);
taosReleaseRef(streamBackendId, pMeta->streamBackendRid); void* uniqueId =
taosMemoryFree(pState); taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
pState = NULL; if (uniqueId == NULL) {
int code = streamStateOpenBackend(pMeta->streamBackend, pState);
if (code == -1) {
taosReleaseRef(streamBackendId, pState->streamBackendRid);
taosThreadMutexUnlock(&pMeta->backendMutex);
taosMemoryFree(pState);
return NULL;
}
taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1,
&pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId));
} else {
int64_t id = *(int64_t*)uniqueId;
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id);
taosAcquireRef(streamBackendId, pState->streamBackendRid);
} }
taosThreadMutexUnlock(&pMeta->backendMutex);
pState->pTdbState->pOwner = pTask; pState->pTdbState->pOwner = pTask;
pState->pFileState = NULL; pState->pFileState = NULL;
@ -385,8 +402,8 @@ int32_t streamStateClear(SStreamState* pState) {
streamStatePut(pState, &key, NULL, 0); streamStatePut(pState, &key, NULL, 0);
while (1) { while (1) {
SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &key); SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &key);
SWinKey delKey = {0}; SWinKey delKey = {0};
int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0); int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0);
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
if (code == 0) { if (code == 0) {
streamStateDel(pState, &delKey); streamStateDel(pState, &delKey);
@ -498,7 +515,7 @@ int32_t streamStateGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const void**
return -1; return -1;
} }
const SStateKey* pKTmp = NULL; const SStateKey* pKTmp = NULL;
int32_t kLen; int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) { if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) {
return -1; return -1;
} }
@ -518,7 +535,7 @@ int32_t streamStateFillGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const vo
return -1; return -1;
} }
const SWinKey* pKTmp = NULL; const SWinKey* pKTmp = NULL;
int32_t kLen; int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) { if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) {
return -1; return -1;
} }
@ -535,7 +552,7 @@ int32_t streamStateGetGroupKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const v
return -1; return -1;
} }
uint64_t groupId = pKey->groupId; uint64_t groupId = pKey->groupId;
int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen); int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen);
if (code == 0) { if (code == 0) {
if (pKey->groupId == groupId) { if (pKey->groupId == groupId) {
return 0; return 0;
@ -553,7 +570,7 @@ int32_t streamStateGetFirst(SStreamState* pState, SWinKey* key) {
SWinKey tmp = {.ts = 0, .groupId = 0}; SWinKey tmp = {.ts = 0, .groupId = 0};
streamStatePut(pState, &tmp, NULL, 0); streamStatePut(pState, &tmp, NULL, 0);
SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &tmp); SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &tmp);
int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0); int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0);
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
streamStateDel(pState, &tmp); streamStateDel(pState, &tmp);
return code; return code;
@ -593,7 +610,7 @@ SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key
} }
SStateKey sKey = {.key = *key, .opNum = pState->number}; SStateKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0; int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateKey), &c) < 0) { if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateKey), &c) < 0) {
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
return NULL; return NULL;
@ -726,9 +743,9 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
#else #else
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key); SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key);
SSessionKey resKey = *key; SSessionKey resKey = *key;
void* tmp = NULL; void* tmp = NULL;
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen); int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen);
if (code == 0) { if (code == 0) {
if (key->win.skey != resKey.win.skey) { if (key->win.skey != resKey.win.skey) {
code = -1; code = -1;
@ -745,6 +762,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) {
#ifdef USE_ROCKSDB #ifdef USE_ROCKSDB
qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId);
return streamStateSessionDel_rocksdb(pState, key); return streamStateSessionDel_rocksdb(pState, key);
#else #else
SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
@ -767,7 +785,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons
} }
SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0; int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
return NULL; return NULL;
@ -798,7 +816,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, cons
} }
SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0; int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
return NULL; return NULL;
@ -830,7 +848,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess
} }
SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0; int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
return NULL; return NULL;
@ -854,7 +872,7 @@ int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, v
return -1; return -1;
} }
SStateSessionKey* pKTmp = NULL; SStateSessionKey* pKTmp = NULL;
int32_t kLen; int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, (const void**)pVal, pVLen) < 0) { if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, (const void**)pVal, pVLen) < 0) {
return -1; return -1;
} }
@ -873,13 +891,13 @@ int32_t streamStateSessionClear(SStreamState* pState) {
#ifdef USE_ROCKSDB #ifdef USE_ROCKSDB
return streamStateSessionClear_rocksdb(pState); return streamStateSessionClear_rocksdb(pState);
#else #else
SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0};
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, &key); SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, &key);
while (1) { while (1) {
SSessionKey delKey = {0}; SSessionKey delKey = {0};
void* buf = NULL; void* buf = NULL;
int32_t size = 0; int32_t size = 0;
int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size);
if (code == 0 && size > 0) { if (code == 0 && size > 0) {
memset(buf, 0, size); memset(buf, 0, size);
streamStateSessionPut(pState, &delKey, buf, size); streamStateSessionPut(pState, &delKey, buf, size);
@ -908,14 +926,14 @@ int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey*
} }
SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0; int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
return -1; return -1;
} }
SSessionKey resKey = *key; SSessionKey resKey = *key;
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0);
if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) { if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) {
*curKey = resKey; *curKey = resKey;
streamStateFreeCur(pCur); streamStateFreeCur(pCur);
@ -951,19 +969,19 @@ int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key,
return streamStateSessionAddIfNotExist_rocksdb(pState, key, gap, pVal, pVLen); return streamStateSessionAddIfNotExist_rocksdb(pState, key, gap, pVal, pVLen);
#else #else
// todo refactor // todo refactor
int32_t res = 0; int32_t res = 0;
SSessionKey originKey = *key; SSessionKey originKey = *key;
SSessionKey searchKey = *key; SSessionKey searchKey = *key;
searchKey.win.skey = key->win.skey - gap; searchKey.win.skey = key->win.skey - gap;
searchKey.win.ekey = key->win.ekey + gap; searchKey.win.ekey = key->win.ekey + gap;
int32_t valSize = *pVLen; int32_t valSize = *pVLen;
void* tmp = tdbRealloc(NULL, valSize); void* tmp = tdbRealloc(NULL, valSize);
if (!tmp) { if (!tmp) {
return -1; return -1;
} }
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
if (code == 0) { if (code == 0) {
if (sessionRangeKeyCmpr(&searchKey, key) == 0) { if (sessionRangeKeyCmpr(&searchKey, key) == 0) {
memcpy(tmp, *pVal, valSize); memcpy(tmp, *pVal, valSize);
@ -1006,16 +1024,16 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch
#ifdef USE_ROCKSDB #ifdef USE_ROCKSDB
return streamStateStateAddIfNotExist_rocksdb(pState, key, pKeyData, keyDataLen, fn, pVal, pVLen); return streamStateStateAddIfNotExist_rocksdb(pState, key, pKeyData, keyDataLen, fn, pVal, pVLen);
#else #else
int32_t res = 0; int32_t res = 0;
SSessionKey tmpKey = *key; SSessionKey tmpKey = *key;
int32_t valSize = *pVLen; int32_t valSize = *pVLen;
void* tmp = tdbRealloc(NULL, valSize); void* tmp = tdbRealloc(NULL, valSize);
if (!tmp) { if (!tmp) {
return -1; return -1;
} }
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
if (code == 0) { if (code == 0) {
if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) { if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) {
memcpy(tmp, *pVal, valSize); memcpy(tmp, *pVal, valSize);
@ -1113,6 +1131,8 @@ int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark) {
#endif #endif
} }
void streamStateReloadInfo(SStreamState* pState, TSKEY ts) { streamFileStateReloadInfo(pState->pFileState, ts); }
#if 0 #if 0
char* streamStateSessionDump(SStreamState* pState) { char* streamStateSessionDump(SStreamState* pState) {
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));

View File

@ -17,9 +17,9 @@
#include "tstream.h" #include "tstream.h"
#include "wal.h" #include "wal.h"
static int32_t mndAddToTaskset(SArray* pArray, SStreamTask* pTask) { static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) {
int32_t childId = taosArrayGetSize(pArray); int32_t childId = taosArrayGetSize(pArray);
pTask->selfChildId = childId; pTask->info.selfChildId = childId;
taosArrayPush(pArray, &pTask); taosArrayPush(pArray, &pTask);
return 0; return 0;
} }
@ -33,8 +33,8 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto
pTask->id.taskId = tGenIdPI32(); pTask->id.taskId = tGenIdPI32();
pTask->id.streamId = streamId; pTask->id.streamId = streamId;
pTask->taskLevel = taskLevel; pTask->info.taskLevel = taskLevel;
pTask->fillHistory = fillHistory; pTask->info.fillHistory = fillHistory;
pTask->triggerParam = triggerParam; pTask->triggerParam = triggerParam;
char buf[128] = {0}; char buf[128] = {0};
@ -42,10 +42,11 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto
pTask->id.idStr = taosStrdup(buf); pTask->id.idStr = taosStrdup(buf);
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY;
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
mndAddToTaskset(pTaskList, pTask); addToTaskset(pTaskList, pTask);
return pTask; return pTask;
} }
@ -71,30 +72,40 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
if (tStartEncode(pEncoder) < 0) return -1; if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1; if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.totalLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.taskLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1;
if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1; if (tEncodeI16(pEncoder, pTask->msgInfo.msgType) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1;
int32_t epSz = taosArrayGetSize(pTask->childEpInfo); if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1;
if (tEncodeI32(pEncoder, pTask->historyTaskId.taskId)) return -1;
if (tEncodeI64(pEncoder, pTask->streamTaskId.streamId)) return -1;
if (tEncodeI32(pEncoder, pTask->streamTaskId.taskId)) return -1;
if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1;
if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1;
if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1;
if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1;
int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
if (tEncodeI32(pEncoder, epSz) < 0) return -1; if (tEncodeI32(pEncoder, epSz) < 0) return -1;
for (int32_t i = 0; i < epSz; i++) { for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->childEpInfo, i); SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1;
} }
if (pTask->taskLevel != TASK_LEVEL__SINK) { if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
if (tEncodeCStr(pEncoder, pTask->exec.qmsg) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->exec.qmsg) < 0) return -1;
} }
@ -124,25 +135,36 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
if (tStartDecode(pDecoder) < 0) return -1; if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.totalLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.taskLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1;
if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1; if (tDecodeI16(pDecoder, &pTask->msgInfo.msgType) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1;
if (tDecodeI32(pDecoder, &pTask->historyTaskId.taskId)) return -1;
if (tDecodeI64(pDecoder, &pTask->streamTaskId.streamId)) return -1;
if (tDecodeI32(pDecoder, &pTask->streamTaskId.taskId)) return -1;
if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1;
if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1;
if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1;
if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1;
int32_t epSz; int32_t epSz;
if (tDecodeI32(pDecoder, &epSz) < 0) return -1; if (tDecodeI32(pDecoder, &epSz) < 0) return -1;
pTask->childEpInfo = taosArrayInit(epSz, sizeof(void*));
pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES);
for (int32_t i = 0; i < epSz; i++) { for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo));
if (pInfo == NULL) return -1; if (pInfo == NULL) return -1;
@ -150,10 +172,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
taosMemoryFreeClear(pInfo); taosMemoryFreeClear(pInfo);
return -1; return -1;
} }
taosArrayPush(pTask->childEpInfo, &pInfo); taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo);
} }
if (pTask->taskLevel != TASK_LEVEL__SINK) { if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
if (tDecodeCStrAlloc(pDecoder, &pTask->exec.qmsg) < 0) return -1; if (tDecodeCStrAlloc(pDecoder, &pTask->exec.qmsg) < 0) return -1;
} }
@ -203,7 +225,7 @@ void tFreeStreamTask(SStreamTask* pTask) {
walCloseReader(pTask->exec.pWalReader); walCloseReader(pTask->exec.pWalReader);
} }
taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree);
if (pTask->outputType == TASK_OUTPUT__TABLE) { if (pTask->outputType == TASK_OUTPUT__TABLE) {
tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper);
taosMemoryFree(pTask->tbSink.pTSchema); taosMemoryFree(pTask->tbSink.pTSchema);

View File

@ -43,12 +43,13 @@ struct SStreamFileState {
uint64_t maxRowCount; uint64_t maxRowCount;
uint64_t curRowCount; uint64_t curRowCount;
GetTsFun getTs; GetTsFun getTs;
char* id;
}; };
typedef SRowBuffPos SRowBuffInfo; typedef SRowBuffPos SRowBuffInfo;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark) { GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) {
if (memSize <= 0) { if (memSize <= 0) {
memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE; memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE;
} }
@ -70,6 +71,7 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) { if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) {
goto _error; goto _error;
} }
pFileState->keyLen = keySize; pFileState->keyLen = keySize;
pFileState->rowSize = rowSize; pFileState->rowSize = rowSize;
pFileState->selectivityRowSize = selectRowSize; pFileState->selectivityRowSize = selectRowSize;
@ -81,6 +83,8 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
pFileState->deleteMark = delMark; pFileState->deleteMark = delMark;
pFileState->flushMark = INT64_MIN; pFileState->flushMark = INT64_MIN;
pFileState->maxTs = INT64_MIN; pFileState->maxTs = INT64_MIN;
pFileState->id = taosStrdup(idstr);
recoverSnapshot(pFileState); recoverSnapshot(pFileState);
return pFileState; return pFileState;
@ -124,6 +128,8 @@ void streamFileStateDestroy(SStreamFileState* pFileState) {
if (!pFileState) { if (!pFileState) {
return; return;
} }
taosMemoryFree(pFileState->id);
tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr); tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr);
tdListFreeP(pFileState->freeBuffs, destroyRowBuff); tdListFreeP(pFileState->freeBuffs, destroyRowBuff);
tSimpleHashCleanup(pFileState->rowBuffMap); tSimpleHashCleanup(pFileState->rowBuffMap);
@ -177,7 +183,8 @@ void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uin
i++; i++;
} }
} }
qInfo("do stream state flush %d rows to disck. is used: %d", listNEles(pFlushList), used);
qInfo("stream state flush %d rows to disk. is used:%d", listNEles(pFlushList), used);
} }
int32_t flushRowBuff(SStreamFileState* pFileState) { int32_t flushRowBuff(SStreamFileState* pFileState) {
@ -185,13 +192,17 @@ int32_t flushRowBuff(SStreamFileState* pFileState) {
if (!pFlushList) { if (!pFlushList) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO); uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO);
num = TMAX(num, FLUSH_NUM); num = TMAX(num, FLUSH_NUM);
popUsedBuffs(pFileState, pFlushList, num, false); popUsedBuffs(pFileState, pFlushList, num, false);
if (isListEmpty(pFlushList)) { if (isListEmpty(pFlushList)) {
popUsedBuffs(pFileState, pFlushList, num, true); popUsedBuffs(pFileState, pFlushList, num, true);
} }
flushSnapshot(pFileState, pFlushList, false); flushSnapshot(pFileState, pFlushList, false);
SListIter fIter = {0}; SListIter fIter = {0};
tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD); tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD);
SListNode* pNode = NULL; SListNode* pNode = NULL;
@ -201,6 +212,7 @@ int32_t flushRowBuff(SStreamFileState* pFileState) {
tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff); tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff);
pPos->pRowBuff = NULL; pPos->pRowBuff = NULL;
} }
tdListFreeP(pFlushList, destroyRowBuffPosPtr); tdListFreeP(pFlushList, destroyRowBuffPosPtr);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -269,13 +281,13 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi
TSKEY ts = pFileState->getTs(pKey); TSKEY ts = pFileState->getTs(pKey);
if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) { if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) {
int32_t len = 0; int32_t len = 0;
void* pVal = NULL; void* p = NULL;
int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &pVal, &len); int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &p, &len);
qDebug("===stream===get %" PRId64 " from disc, res %d", ts, code); qDebug("===stream===get %" PRId64 " from disc, res %d", ts, code);
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
memcpy(pNewPos->pRowBuff, pVal, len); memcpy(pNewPos->pRowBuff, p, len);
} }
taosMemoryFree(pVal); taosMemoryFree(p);
} }
tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES); tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES);
@ -348,7 +360,10 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD); tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD);
const int32_t BATCH_LIMIT = 256; const int32_t BATCH_LIMIT = 256;
SListNode* pNode = NULL;
int64_t st = taosGetTimestampMs();
int32_t numOfElems = listNEles(pSnapshot);
SListNode* pNode = NULL;
int idx = streamStateGetCfIdx(pFileState->pFileStore, "state"); int idx = streamStateGetCfIdx(pFileState->pFileStore, "state");
@ -359,6 +374,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) {
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data; SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
ASSERT(pPos->pRowBuff && pFileState->rowSize > 0); ASSERT(pPos->pRowBuff && pFileState->rowSize > 0);
if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
streamStateClearBatch(batch); streamStateClearBatch(batch);
@ -367,16 +383,22 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number}; SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number};
code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, &sKey, pPos->pRowBuff, pFileState->rowSize, code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, &sKey, pPos->pRowBuff, pFileState->rowSize,
0, buf); 0, buf);
// todo handle failure
memset(buf, 0, len); memset(buf, 0, len);
qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); // qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code);
} }
taosMemoryFree(buf); taosMemoryFree(buf);
if (streamStateGetBatchSize(batch) > 0) { if (streamStateGetBatchSize(batch) > 0) {
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
} }
streamStateClearBatch(batch); streamStateClearBatch(batch);
int64_t elapsed = taosGetTimestampMs() - st;
qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems,
BATCH_LIMIT, elapsed);
if (flushState) { if (flushState) {
const char* taskKey = "streamFileState"; const char* taskKey = "streamFileState";
{ {
@ -398,8 +420,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
} }
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
} }
streamStateDestroyBatch(batch);
streamStateDestroyBatch(batch);
return code; return code;
} }
@ -502,3 +524,8 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) {
} }
int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState) { return pFileState->selectivityRowSize; } int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState) { return pFileState->selectivityRowSize; }
void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts) {
pFileState->flushMark = TMAX(pFileState->flushMark, ts);
pFileState->maxTs = TMAX(pFileState->maxTs, ts);
}

View File

@ -158,7 +158,7 @@ TEST(TD_STREAM_UPDATE_TEST, update) {
// void *buf = taosMemoryCalloc(1, bufLen); // void *buf = taosMemoryCalloc(1, bufLen);
// int32_t resSize = updateInfoSerialize(buf, bufLen, pSU7); // int32_t resSize = updateInfoSerialize(buf, bufLen, pSU7);
// SUpdateInfo *pSU6 = updateInfoInit(0, TSDB_TIME_PRECISION_MILLI, 0); // SUpdateInfo *pSU6 = taosMemoryCalloc(1, sizeof(SUpdateInfo));
// int32_t desSize = updateInfoDeserialize(buf, bufLen, pSU6); // int32_t desSize = updateInfoDeserialize(buf, bufLen, pSU6);
// GTEST_ASSERT_EQ(desSize, 0); // GTEST_ASSERT_EQ(desSize, 0);

View File

@ -118,7 +118,7 @@ echo "statusInterval 1" >> $TAOS_CFG
echo "dataDir $DATA_DIR" >> $TAOS_CFG echo "dataDir $DATA_DIR" >> $TAOS_CFG
echo "logDir $LOG_DIR" >> $TAOS_CFG echo "logDir $LOG_DIR" >> $TAOS_CFG
echo "debugFlag 0" >> $TAOS_CFG echo "debugFlag 0" >> $TAOS_CFG
echo "tmrDebugFlag 131" >> $TAOS_CFG echo "tmrDebugFlag 143" >> $TAOS_CFG
echo "uDebugFlag 143" >> $TAOS_CFG echo "uDebugFlag 143" >> $TAOS_CFG
echo "rpcDebugFlag 143" >> $TAOS_CFG echo "rpcDebugFlag 143" >> $TAOS_CFG
echo "jniDebugFlag 143" >> $TAOS_CFG echo "jniDebugFlag 143" >> $TAOS_CFG

View File

@ -0,0 +1,405 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/exec.sh -n dnode1 -s start
sleep 50
sql connect
print =============== create database
sql create database test vgroups 1;
sql select * from information_schema.ins_databases
if $rows != 3 then
return -1
endi
print $data00 $data01 $data02
sql use test;
print =====step1
sql create table t1(ts timestamp, a int, b int , c int, d double);
sql insert into t1 values(1648791213000,10,2,3,1.0);
sql create stream stream0 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, sum(a) from t1 interval(10s);
$loop_count = 0
loop00:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop00
endi
if $data01 != 10 then
print =====data01=$data01
goto loop00
endi
sql insert into t1 values(1648791213000,1,2,3,1.0);
$loop_count = 0
loop0:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop0
endi
if $data01 != 1 then
print =====data01=$data01
goto loop0
endi
sql insert into t1 values(1648791213001,2,2,3,1.0);
$loop_count = 0
loop1:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop1
endi
if $data01 != 3 then
print ======$data01
goto loop1
endi
sql insert into t1 values(1648791223001,3,2,3,1.0);
sql insert into t1 values(1648791223002,4,2,3,1.0);
$loop_count = 0
loop2:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt;
if $rows != 2 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop2
endi
if $data01 != 3 then
print ======$data01
goto loop2
endi
if $data11 != 7 then
print ======$data01
goto loop2
endi
print =====step1 over
print =====step2
sql create database test1 vgroups 4;
sql use test1;
sql create stable st(ts timestamp,a int,b int,c int,d double) tags(ta int,tb int,tc int);
sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql insert into t1 values(1648791213000,10,2,3,1.0);
sql create stream stream1 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, sum(a) from st interval(10s);
$loop_count = 0
loop00:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt1;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop00
endi
if $data01 != 10 then
print =====data01=$data01
goto loop00
endi
sql insert into t1 values(1648791213000,1,2,3,1.0);
$loop_count = 0
loop0:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt1;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop0
endi
if $data01 != 1 then
print =====data01=$data01
goto loop0
endi
sql insert into t1 values(1648791213001,2,2,3,1.0);
$loop_count = 0
loop1:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt1;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop1
endi
if $data01 != 3 then
print ======$data01
goto loop1
endi
sql insert into t1 values(1648791223001,3,2,3,1.0);
sql insert into t1 values(1648791223002,4,2,3,1.0);
$loop_count = 0
loop2:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt1;
if $rows != 2 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop2
endi
if $data01 != 3 then
print ======$data01
goto loop2
endi
if $data11 != 7 then
print ======$data01
goto loop2
endi
print =====step2 over
print =====step3
sql create database test2 vgroups 4;
sql use test2;
sql create stable st(ts timestamp,a int,b int,c int,d double) tags(ta int,tb int,tc int);
sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql insert into t1 values(1648791213000,10,2,3,1.0);
sql create stream stream2 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt2 as select _wstart, sum(a) from st partition by ta interval(10s);
$loop_count = 0
loop00:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt2;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop00
endi
if $data01 != 10 then
print =====data01=$data01
goto loop00
endi
sql insert into t1 values(1648791213000,1,2,3,1.0);
$loop_count = 0
loop0:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt2;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop0
endi
if $data01 != 1 then
print =====data01=$data01
goto loop0
endi
sql insert into t1 values(1648791213001,2,2,3,1.0);
$loop_count = 0
loop1:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt2;
if $rows != 1 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop1
endi
if $data01 != 3 then
print ======$data01
goto loop1
endi
sql insert into t1 values(1648791223001,3,2,3,1.0);
sql insert into t1 values(1648791223002,4,2,3,1.0);
$loop_count = 0
loop2:
sleep 1000
$loop_count = $loop_count + 1
if $loop_count == 20 then
return -1
endi
sql select * from streamt2;
if $rows != 2 then
print ======$rows
print data00,data01, data02
print data10,data11, data12
print data20,data21, data22
goto loop2
endi
if $data01 != 3 then
print ======$data01
goto loop2
endi
if $data11 != 7 then
print ======$data01
goto loop2
endi
print =====step3 over
print =====over
system sh/stop_dnodes.sh

View File

@ -14,6 +14,7 @@ sql create table ts3 using st tags(3,2,2);
sql create table ts4 using st tags(4,2,2); sql create table ts4 using st tags(4,2,2);
sql create stream stream_t1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test0.streamtST1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5 from st partition by ta,tb,tc interval(10s); sql create stream stream_t1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test0.streamtST1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5 from st partition by ta,tb,tc interval(10s);
sleep 500
sql insert into ts1 values(1648791213001,1,12,3,1.0); sql insert into ts1 values(1648791213001,1,12,3,1.0);
sql insert into ts2 values(1648791213001,1,12,3,1.0); sql insert into ts2 values(1648791213001,1,12,3,1.0);

View File

@ -575,8 +575,6 @@ endi
$loop_count = 0 $loop_count = 0
print step 7 print step 7
sql create database test3 vgroups 6; sql create database test3 vgroups 6;
sql use test3; sql use test3;
sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb int,tc int); sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb int,tc int);

View File

@ -22,7 +22,7 @@ class TDTestCase:
def init(self, conn, logSql, replicaVar=1): def init(self, conn, logSql, replicaVar=1):
self.replicaVar = int(replicaVar) self.replicaVar = int(replicaVar)
tdLog.debug("start to execute %s" % __file__) tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor()) tdSql.init(conn.cursor(), True)
self.dbname = 'db' self.dbname = 'db'
self.setsql = TDSetSql() self.setsql = TDSetSql()
self.stbname = 'stb' self.stbname = 'stb'

View File

@ -138,9 +138,9 @@ class TDTestCase:
tdLog.printNoPrefix(f"==========step1:prepare and check data in old version-{BASEVERSION}") tdLog.printNoPrefix(f"==========step1:prepare and check data in old version-{BASEVERSION}")
tdLog.info(f" LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ") tdLog.info(f" LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ")
os.system(f"LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ") os.system(f"LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ")
os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ") # os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ")
os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ') # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ')
os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ') # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ')
os.system(f"sed -i 's/\/etc\/taos/{cPath}/' 0-others/tmqBasic.json ") os.system(f"sed -i 's/\/etc\/taos/{cPath}/' 0-others/tmqBasic.json ")
# os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/tmqBasic.json -y ") # os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/tmqBasic.json -y ")
os.system('LD_LIBRARY_PATH=/usr/lib taos -s "create topic if not exists tmq_test_topic as select current,voltage,phase from test.meters where voltage <= 106 and current <= 5;" ') os.system('LD_LIBRARY_PATH=/usr/lib taos -s "create topic if not exists tmq_test_topic as select current,voltage,phase from test.meters where voltage <= 106 and current <= 5;" ')
@ -224,7 +224,7 @@ class TDTestCase:
args = (caller.filename, caller.lineno) args = (caller.filename, caller.lineno)
tdLog.exit("%s(%d) failed" % args) tdLog.exit("%s(%d) failed" % args)
tdsql.query("show streams;") tdsql.query("show streams;")
tdsql.checkRows(2) tdsql.checkRows(0)
tdsql.query("select *,tbname from d0.almlog where mcid='m0103';") tdsql.query("select *,tbname from d0.almlog where mcid='m0103';")
tdsql.checkRows(6) tdsql.checkRows(6)
expectList = [0,3003,20031,20032,20033,30031] expectList = [0,3003,20031,20032,20033,30031]

View File

@ -44,7 +44,7 @@ class TDTestCase:
new_dbname = list(dbname) new_dbname = list(dbname)
new_dbname.insert(i,j) new_dbname.insert(i,j)
dbname_1 = ''.join(new_dbname) dbname_1 = ''.join(new_dbname)
tdSql.execute(f'create database if not exists `{dbname_1}`') tdSql.execute(f'create database if not exists `{dbname_1}` vgroups 1 replica 1')
tdSql.query('select * from information_schema.ins_databases') tdSql.query('select * from information_schema.ins_databases')
tdSql.checkEqual(tdSql.queryResult[2][0],str(dbname_1)) tdSql.checkEqual(tdSql.queryResult[2][0],str(dbname_1))
tdSql.execute(f'drop database `{dbname_1}`') tdSql.execute(f'drop database `{dbname_1}`')
@ -56,7 +56,7 @@ class TDTestCase:
def tb_name_check(self): def tb_name_check(self):
dbname = tdCom.getLongName(10) dbname = tdCom.getLongName(10)
tdSql.execute(f'create database if not exists `{dbname}`') tdSql.execute(f'create database if not exists `{dbname}` vgroups 1 replica 1')
tdSql.execute(f'use `{dbname}`') tdSql.execute(f'use `{dbname}`')
tbname = tdCom.getLongName(5) tbname = tdCom.getLongName(5)
for i in self.special_name: for i in self.special_name: