diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h
index fcb3160f64..34372dc2ff 100644
--- a/include/libs/executor/executor.h
+++ b/include/libs/executor/executor.h
@@ -26,6 +26,7 @@ extern "C" {
typedef void* qTaskInfo_t;
typedef void* DataSinkHandle;
+
struct SRpcMsg;
struct SSubplan;
@@ -91,6 +92,9 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId);
int32_t qSetStreamOpOpen(qTaskInfo_t tinfo);
+// todo refactor
+void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId);
+
/**
* Set multiple input data blocks for the stream scan.
* @param tinfo
@@ -119,7 +123,7 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks,
* @param isAdd
* @return
*/
-int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd);
+int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd);
/**
* Create the exec task object according to task json
@@ -163,6 +167,7 @@ void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo);
* @return
*/
int32_t qAsyncKillTask(qTaskInfo_t tinfo, int32_t rspCode);
+
int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode);
bool qTaskIsExecuting(qTaskInfo_t qinfo);
@@ -182,21 +187,11 @@ int32_t qSerializeTaskStatus(qTaskInfo_t tinfo, char** pOutput, int32_t* len);
int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t len);
STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key);
-/**
- * return the scan info, in the form of tuple of two items, including table uid and current timestamp
- * @param tinfo
- * @param uid
- * @param ts
- * @return
- */
-int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts);
-int32_t qStreamPrepareTsdbScan(qTaskInfo_t tinfo, uint64_t uid, int64_t ts);
+SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo);
int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType);
-// int32_t qStreamScanMemData(qTaskInfo_t tinfo, const SSubmitReq* pReq, int64_t ver);
-//
int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit);
void qStreamSetOpen(qTaskInfo_t tinfo);
diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h
index b6ada5a0c7..cfc6ef2025 100644
--- a/include/libs/qcom/query.h
+++ b/include/libs/qcom/query.h
@@ -194,6 +194,7 @@ typedef struct SRequestConnInfo {
typedef void (*__freeFunc)(void* param);
+// todo add creator/destroyer function
typedef struct SMsgSendInfo {
__async_send_cb_fn_t fp; // async callback function
STargetInfo target; // for update epset
diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h
index fd5cec2931..42a7261f38 100644
--- a/include/libs/stream/streamState.h
+++ b/include/libs/stream/streamState.h
@@ -42,6 +42,7 @@ typedef struct STdbState {
typedef struct {
STdbState* pTdbState;
int32_t number;
+ int64_t checkPointId;
} SStreamState;
SStreamState* streamStateOpen(char* path, struct SStreamTask* pTask, bool specPath, int32_t szPage, int32_t pages);
diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h
index 5b1d1fa1bc..103f807191 100644
--- a/include/libs/stream/tstream.h
+++ b/include/libs/stream/tstream.h
@@ -13,8 +13,8 @@
* along with this program. If not, see .
*/
-#include "executor.h"
#include "os.h"
+#include "executor.h"
#include "query.h"
#include "streamState.h"
#include "tdatablock.h"
@@ -50,6 +50,7 @@ enum {
TASK_STATUS__RECOVER_PREPARE,
TASK_STATUS__RECOVER1,
TASK_STATUS__RECOVER2,
+ TASK_STATUS__RESTORE, // only available for source task to replay WAL from the checkpoint
};
enum {
@@ -103,21 +104,8 @@ typedef struct {
int8_t type;
} SStreamQueueItem;
-#if 0
-typedef struct {
- int8_t type;
- int64_t ver;
- int32_t* dataRef;
- SSubmitReq* data;
-} SStreamDataSubmit;
-
-typedef struct {
- int8_t type;
- int64_t ver;
- SArray* dataRefs; // SArray
- SArray* reqs; // SArray
-} SStreamMergedSubmit;
-#endif
+typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data);
+typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver);
typedef struct {
int8_t type;
@@ -219,21 +207,20 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) {
}
static FORCE_INLINE void* streamQueueCurItem(SStreamQueue* queue) {
- //
return queue->qItem;
}
void* streamQueueNextItem(SStreamQueue* queue);
-SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit);
+SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type);
void streamDataSubmitDestroy(SStreamDataSubmit2* pDataSubmit);
SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit);
typedef struct {
- char* qmsg;
- // followings are not applicable to encoder and decoder
- void* executor;
+ char* qmsg;
+ void* pExecutor; // not applicable to encoder and decoder
+ struct SWalReader* pWalReader; // not applicable to encoder and decoder
} STaskExec;
typedef struct {
@@ -248,16 +235,13 @@ typedef struct {
SUseDbRsp dbInfo;
} STaskDispatcherShuffle;
-typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data);
-
typedef struct {
int64_t stbUid;
char stbFullName[TSDB_TABLE_FNAME_LEN];
SSchemaWrapper* pSchemaWrapper;
- // not applicable to encoder and decoder
- void* vnode;
- FTbSink* tbSinkFunc;
- STSchema* pTSchema;
+ void* vnode; // not available to encoder and decoder
+ FTbSink* tbSinkFunc;
+ STSchema* pTSchema;
} STaskSinkTb;
typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data);
@@ -280,24 +264,34 @@ typedef struct {
SEpSet epSet;
} SStreamChildEpInfo;
-struct SStreamTask {
- int64_t streamId;
- int32_t taskId;
- int32_t totalLevel;
- int8_t taskLevel;
- int8_t outputType;
- int16_t dispatchMsgType;
+typedef struct SStreamId {
+ int64_t streamId;
+ int32_t taskId;
+ const char* idStr;
+} SStreamId;
+typedef struct SCheckpointInfo {
+ int64_t id;
+ int64_t version; // offset in WAL
+} SCheckpointInfo;
+
+typedef struct SStreamStatus {
int8_t taskStatus;
int8_t schedStatus;
+} SStreamStatus;
- // node info
- int32_t selfChildId;
- int32_t nodeId;
- SEpSet epSet;
-
- int64_t recoverSnapVer;
- int64_t startVer;
+struct SStreamTask {
+ SStreamId id;
+ int32_t totalLevel;
+ int8_t taskLevel;
+ int8_t outputType;
+ int16_t dispatchMsgType;
+ SStreamStatus status;
+ int32_t selfChildId;
+ int32_t nodeId;
+ SEpSet epSet;
+ SCheckpointInfo chkInfo;
+ STaskExec exec;
// fill history
int8_t fillHistory;
@@ -307,9 +301,6 @@ struct SStreamTask {
int32_t nextCheckId;
SArray* checkpointInfo; // SArray
- // exec
- STaskExec exec;
-
// output
union {
STaskDispatcherFixedEp fixedEpDispatcher;
@@ -319,44 +310,54 @@ struct SStreamTask {
STaskSinkFetch fetchSink;
};
- int8_t inputStatus;
- int8_t outputStatus;
-
- // STaosQueue* inputQueue1;
- // STaosQall* inputQall;
+ int8_t inputStatus;
+ int8_t outputStatus;
SStreamQueue* inputQueue;
SStreamQueue* outputQueue;
// trigger
- int8_t triggerStatus;
- int64_t triggerParam;
- void* timer;
+ int8_t triggerStatus;
+ int64_t triggerParam;
+ void* timer;
+ SMsgCb* pMsgCb; // msg handle
+ SStreamState* pState; // state backend
- // msg handle
- SMsgCb* pMsgCb;
-
- // state backend
- SStreamState* pState;
-
- // do not serialize
- int32_t recoverTryingDownstream;
- int32_t recoverWaitingUpstream;
- int64_t checkReqId;
- SArray* checkReqIds; // shuffle
- int32_t refCnt;
-
- int64_t checkpointingId;
- int32_t checkpointAlignCnt;
+ // the followings attributes don't be serialized
+ int32_t recoverTryingDownstream;
+ int32_t recoverWaitingUpstream;
+ int64_t checkReqId;
+ SArray* checkReqIds; // shuffle
+ int32_t refCnt;
+ int64_t checkpointingId;
+ int32_t checkpointAlignCnt;
+ struct SStreamMeta* pMeta;
};
+// meta
+typedef struct SStreamMeta {
+ char* path;
+ TDB* db;
+ TTB* pTaskDb;
+ TTB* pCheckpointDb;
+ SHashObj* pTasks;
+ void* ahandle;
+ TXN* txn;
+ FTaskExpand* expandFunc;
+ int32_t vgId;
+ SRWLatch lock;
+ int8_t walScan;
+ bool quit;
+} SStreamMeta;
+
int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo);
int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo);
-SStreamTask* tNewSStreamTask(int64_t streamId);
-int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask);
-int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask);
-void tFreeSStreamTask(SStreamTask* pTask);
-int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem);
+SStreamTask* tNewStreamTask(int64_t streamId);
+int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask);
+int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask);
+void tFreeStreamTask(SStreamTask* pTask);
+int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem);
+bool tInputQueueIsFull(const SStreamTask* pTask);
static FORCE_INLINE void streamTaskInputFail(SStreamTask* pTask) {
atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED);
@@ -564,40 +565,22 @@ int32_t streamAggRecoverPrepare(SStreamTask* pTask);
// int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask);
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId);
-// expand and deploy
-typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver);
-
-// meta
-typedef struct SStreamMeta {
- char* path;
- TDB* db;
- TTB* pTaskDb;
- TTB* pCheckpointDb;
- SHashObj* pTasks;
- SHashObj* pRecoverStatus;
- void* ahandle;
- TXN* txn;
- FTaskExpand* expandFunc;
- int32_t vgId;
- SRWLatch lock;
-} SStreamMeta;
-
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId);
void streamMetaClose(SStreamMeta* streamMeta);
int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask);
-int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask);
-int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* msg, int32_t msgLen);
-// SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId);
+int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask);
+int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen);
+int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta);
SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId);
void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask);
void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId);
-int32_t streamMetaBegin(SStreamMeta* pMeta);
-int32_t streamMetaCommit(SStreamMeta* pMeta);
-int32_t streamMetaRollBack(SStreamMeta* pMeta);
-int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver);
+int32_t streamMetaBegin(SStreamMeta* pMeta);
+int32_t streamMetaCommit(SStreamMeta* pMeta);
+int32_t streamMetaRollBack(SStreamMeta* pMeta);
+int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver);
// checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h
index ccbc53fa5d..b51289de5e 100644
--- a/include/libs/wal/wal.h
+++ b/include/libs/wal/wal.h
@@ -138,7 +138,8 @@ typedef struct {
int8_t enableRef;
} SWalFilterCond;
-typedef struct {
+// todo hide this struct
+typedef struct SWalReader {
SWal *pWal;
int64_t readerId;
TdFilePtr pLogFile;
@@ -196,6 +197,7 @@ void walReadReset(SWalReader *pReader);
int32_t walReadVer(SWalReader *pRead, int64_t ver);
int32_t walReadSeekVer(SWalReader *pRead, int64_t ver);
int32_t walNextValidMsg(SWalReader *pRead);
+int64_t walReaderGetCurrentVer(const SWalReader* pReader);
// only for tq usage
void walSetReaderCapacity(SWalReader *pRead, int32_t capacity);
diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h
index 86db35b412..41f87379a9 100644
--- a/source/client/inc/clientInt.h
+++ b/source/client/inc/clientInt.h
@@ -36,14 +36,6 @@ extern "C" {
#include "tconfig.h"
-#define CHECK_CODE_GOTO(expr, label) \
- do { \
- code = expr; \
- if (TSDB_CODE_SUCCESS != code) { \
- goto label; \
- } \
- } while (0)
-
#define ERROR_MSG_BUF_DEFAULT_SIZE 512
#define HEARTBEAT_INTERVAL 1500 // ms
@@ -286,28 +278,7 @@ static FORCE_INLINE SReqResultInfo* tmqGetCurResInfo(TAOS_RES* res) {
return (SReqResultInfo*)&msg->resInfo;
}
-static FORCE_INLINE SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) {
- SMqRspObj* pRspObj = (SMqRspObj*)res;
- pRspObj->resIter++;
-
- if (pRspObj->resIter < pRspObj->rsp.blockNum) {
- SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter);
- if (pRspObj->rsp.withSchema) {
- SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter);
- setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols);
- taosMemoryFreeClear(pRspObj->resInfo.row);
- taosMemoryFreeClear(pRspObj->resInfo.pCol);
- taosMemoryFreeClear(pRspObj->resInfo.length);
- taosMemoryFreeClear(pRspObj->resInfo.convertBuf);
- taosMemoryFreeClear(pRspObj->resInfo.convertJson);
- }
-
- setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false);
- return &pRspObj->resInfo;
- }
-
- return NULL;
-}
+SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4);
static FORCE_INLINE SReqResultInfo* tscGetCurResInfo(TAOS_RES* res) {
if (TD_RES_QUERY(res)) return &(((SRequestObj*)res)->body.resInfo);
@@ -320,7 +291,6 @@ extern int32_t clientConnRefPool;
extern int32_t timestampDeltaLimit;
extern int64_t lastClusterId;
-
__async_send_cb_fn_t getMsgRspHandle(int32_t msgType);
SMsgSendInfo* buildMsgInfoImpl(SRequestObj* pReqObj);
@@ -373,7 +343,6 @@ void taos_close_internal(void* taos);
// global, called by mgmt
int hbMgrInit();
void hbMgrCleanUp();
-int hbHandleRsp(SClientHbBatchRsp* hbRsp);
// cluster level
SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo, char* key);
@@ -386,9 +355,6 @@ void stopAllRequests(SHashObj* pRequests);
int hbRegisterConn(SAppHbMgr* pAppHbMgr, int64_t tscRefId, int64_t clusterId, int8_t connType);
void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey);
-// --- mq
-void hbMgrInitMqHbRspHandle();
-
typedef struct SSqlCallbackWrapper {
SParseContext* pParseCtx;
SCatalogReq* pCatalogReq;
diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c
index dac44bd9c4..ce174744ef 100644
--- a/source/client/src/clientImpl.c
+++ b/source/client/src/clientImpl.c
@@ -1039,8 +1039,7 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat
.sysInfo = pRequest->pTscObj->sysInfo,
.allocatorId = pRequest->allocatorRefId};
- SAppInstInfo* pAppInfo = getAppInfo(pRequest);
- SQueryPlan* pDag = NULL;
+ SQueryPlan* pDag = NULL;
int64_t st = taosGetTimestampUs();
int32_t code = qCreateQueryPlan(&cxt, &pDag, pMnodeList);
@@ -1052,7 +1051,6 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat
}
pRequest->metric.execStart = taosGetTimestampUs();
-
pRequest->metric.planCostUs = pRequest->metric.execStart - st;
if (TSDB_CODE_SUCCESS == code && !pRequest->validateOnly) {
diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c
index 8c70622318..ceca06e309 100644
--- a/source/client/src/clientTmq.c
+++ b/source/client/src/clientTmq.c
@@ -210,6 +210,11 @@ typedef struct {
tmq_t* pTmq;
} SMqCommitCbParam;
+typedef struct SSyncCommitInfo {
+ tsem_t sem;
+ int32_t code;
+} SSyncCommitInfo;
+
static int32_t doAskEp(tmq_t* tmq);
static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg);
static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet);
@@ -521,11 +526,7 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN
return TSDB_CODE_OUT_OF_MEMORY;
}
- pMsgSendInfo->msgInfo = (SDataBuf){
- .pData = buf,
- .len = sizeof(SMsgHead) + len,
- .handle = NULL,
- };
+ pMsgSendInfo->msgInfo = (SDataBuf) { .pData = buf, .len = sizeof(SMsgHead) + len, .handle = NULL };
pMsgSendInfo->requestId = generateRequestId();
pMsgSendInfo->requestObjRefId = 0;
@@ -786,11 +787,7 @@ void tmqSendHbReq(void* param, void* tmrId) {
goto OVER;
}
- sendInfo->msgInfo = (SDataBuf){
- .pData = pReq,
- .len = tlen,
- .handle = NULL,
- };
+ sendInfo->msgInfo = (SDataBuf){ .pData = pReq, .len = tlen, .handle = NULL };
sendInfo->requestId = generateRequestId();
sendInfo->requestObjRefId = 0;
@@ -2126,13 +2123,8 @@ void tmq_commit_async(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_cb* cb, void*
}
}
-typedef struct SSyncCommitInfo {
- tsem_t sem;
- int32_t code;
-} SSyncCommitInfo;
-
-static void commitCallBackFn(tmq_t* pTmq, int32_t code, void* param) {
- SSyncCommitInfo* pInfo = (SSyncCommitInfo*)param;
+static void commitCallBackFn(tmq_t *pTmq, int32_t code, void* param) {
+ SSyncCommitInfo* pInfo = (SSyncCommitInfo*) param;
pInfo->code = code;
tsem_post(&pInfo->sem);
}
@@ -2309,3 +2301,26 @@ void commitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, cons
waitingRspNum);
}
}
+
+SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) {
+ SMqRspObj* pRspObj = (SMqRspObj*)res;
+ pRspObj->resIter++;
+
+ if (pRspObj->resIter < pRspObj->rsp.blockNum) {
+ SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter);
+ if (pRspObj->rsp.withSchema) {
+ SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter);
+ setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols);
+ taosMemoryFreeClear(pRspObj->resInfo.row);
+ taosMemoryFreeClear(pRspObj->resInfo.pCol);
+ taosMemoryFreeClear(pRspObj->resInfo.length);
+ taosMemoryFreeClear(pRspObj->resInfo.convertBuf);
+ taosMemoryFreeClear(pRspObj->resInfo.convertJson);
+ }
+
+ setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false);
+ return &pRspObj->resInfo;
+ }
+
+ return NULL;
+}
\ No newline at end of file
diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c
index 28097311ac..e222349767 100644
--- a/source/dnode/mgmt/mgmt_snode/src/smInt.c
+++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c
@@ -55,6 +55,7 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
smClose(pMgmt);
return -1;
}
+
tmsgReportStartup("snode-impl", "initialized");
if (smStartWorker(pMgmt) != 0) {
diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c
index fb81a764f1..c69f08eb6b 100644
--- a/source/dnode/mnode/impl/src/mndDef.c
+++ b/source/dnode/mnode/impl/src/mndDef.c
@@ -70,7 +70,7 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) {
if (tEncodeI32(pEncoder, innerSz) < 0) return -1;
for (int32_t j = 0; j < innerSz; j++) {
SStreamTask *pTask = taosArrayGetP(pArray, j);
- if (tEncodeSStreamTask(pEncoder, pTask) < 0) return -1;
+ if (tEncodeStreamTask(pEncoder, pTask) < 0) return -1;
}
}
@@ -130,7 +130,7 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
taosArrayDestroy(pArray);
return -1;
}
- if (tDecodeSStreamTask(pDecoder, pTask) < 0) {
+ if (tDecodeStreamTask(pDecoder, pTask) < 0) {
taosMemoryFree(pTask);
taosArrayDestroy(pArray);
return -1;
@@ -158,7 +158,10 @@ void tFreeStreamObj(SStreamObj *pStream) {
taosMemoryFree(pStream->sql);
taosMemoryFree(pStream->ast);
taosMemoryFree(pStream->physicalPlan);
- if (pStream->outputSchema.nCols) taosMemoryFree(pStream->outputSchema.pSchema);
+
+ if (pStream->outputSchema.nCols) {
+ taosMemoryFree(pStream->outputSchema.pSchema);
+ }
int32_t sz = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < sz; i++) {
@@ -166,11 +169,14 @@ void tFreeStreamObj(SStreamObj *pStream) {
int32_t taskSz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < taskSz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
- tFreeSStreamTask(pTask);
+ tFreeStreamTask(pTask);
}
+
taosArrayDestroy(pLevel);
}
+
taosArrayDestroy(pStream->tasks);
+
// tagSchema.pSchema
if (pStream->tagSchema.nCols > 0) {
taosMemoryFree(pStream->tagSchema.pSchema);
diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c
index d1671aa12a..734f624be0 100644
--- a/source/dnode/mnode/impl/src/mndScheduler.c
+++ b/source/dnode/mnode/impl/src/mndScheduler.c
@@ -138,7 +138,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream
for (int32_t j = 0; j < sinkLvSize; j++) {
SStreamTask* pLastLevelTask = taosArrayGetP(sinkLv, j);
if (pLastLevelTask->nodeId == pVgInfo->vgId) {
- pVgInfo->taskId = pLastLevelTask->taskId;
+ pVgInfo->taskId = pLastLevelTask->id.taskId;
break;
}
}
@@ -149,7 +149,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream
SArray* pArray = taosArrayGetP(pStream->tasks, 0);
// one sink only
SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0);
- pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId;
+ pTask->fixedEpDispatcher.taskId = lastLevelTask->id.taskId;
pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId;
pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet;
}
@@ -224,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) {
continue;
}
- SStreamTask* pTask = tNewSStreamTask(pStream->uid);
+ SStreamTask* pTask = tNewStreamTask(pStream->uid);
if (pTask == NULL) {
sdbRelease(pSdb, pVgroup);
terrno = TSDB_CODE_OUT_OF_MEMORY;
@@ -260,7 +260,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) {
int32_t mndAddFixedSinkTaskToStream(SMnode* pMnode, SStreamObj* pStream) {
SArray* tasks = taosArrayGetP(pStream->tasks, 0);
- SStreamTask* pTask = tNewSStreamTask(pStream->uid);
+ SStreamTask* pTask = tNewStreamTask(pStream->uid);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
@@ -350,12 +350,13 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
return -1;
}
- pInnerTask = tNewSStreamTask(pStream->uid);
+ pInnerTask = tNewStreamTask(pStream->uid);
if (pInnerTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
qDestroyQueryPlan(pPlan);
return -1;
}
+
pInnerTask->fillHistory = pStream->fillHistory;
mndAddTaskToTaskSet(taskInnerLevel, pInnerTask);
@@ -421,7 +422,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
continue;
}
- SStreamTask* pTask = tNewSStreamTask(pStream->uid);
+ SStreamTask* pTask = tNewStreamTask(pStream->uid);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
sdbRelease(pSdb, pVgroup);
@@ -440,7 +441,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH;
pTask->outputType = TASK_OUTPUT__FIXED_DISPATCH;
- pTask->fixedEpDispatcher.taskId = pInnerTask->taskId;
+ pTask->fixedEpDispatcher.taskId = pInnerTask->id.taskId;
pTask->fixedEpDispatcher.nodeId = pInnerTask->nodeId;
pTask->fixedEpDispatcher.epSet = pInnerTask->epSet;
@@ -460,7 +461,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
pEpInfo->childId = pTask->selfChildId;
pEpInfo->epSet = pTask->epSet;
pEpInfo->nodeId = pTask->nodeId;
- pEpInfo->taskId = pTask->taskId;
+ pEpInfo->taskId = pTask->id.taskId;
taosArrayPush(pInnerTask->childEpInfo, &pEpInfo);
sdbRelease(pSdb, pVgroup);
}
@@ -491,7 +492,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
continue;
}
- SStreamTask* pTask = tNewSStreamTask(pStream->uid);
+ SStreamTask* pTask = tNewStreamTask(pStream->uid);
if (pTask == NULL) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c
index ff759f5e78..76bb144fcb 100644
--- a/source/dnode/mnode/impl/src/mndStream.c
+++ b/source/dnode/mnode/impl/src/mndStream.c
@@ -35,12 +35,12 @@
static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream);
static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream);
-static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pStream, SStreamObj *pNewStream);
+static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStreamObj *pNewStream);
static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq);
static int32_t mndProcessDropStreamReq(SRpcMsg *pReq);
static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq);
-// static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq);
-/*static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);*/
+static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq);
+static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);
static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq);
static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta);
static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
@@ -418,7 +418,7 @@ FAIL:
int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) {
SEncoder encoder;
tEncoderInit(&encoder, NULL, 0);
- tEncodeSStreamTask(&encoder, pTask);
+ tEncodeStreamTask(&encoder, pTask);
int32_t size = encoder.pos;
int32_t tlen = sizeof(SMsgHead) + size;
tEncoderClear(&encoder);
@@ -430,7 +430,7 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) {
((SMsgHead *)buf)->vgId = htonl(pTask->nodeId);
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, abuf, size);
- tEncodeSStreamTask(&encoder, pTask);
+ tEncodeStreamTask(&encoder, pTask);
tEncoderClear(&encoder);
STransAction action = {0};
@@ -601,7 +601,7 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) {
return -1;
}
pReq->head.vgId = htonl(pTask->nodeId);
- pReq->taskId = pTask->taskId;
+ pReq->taskId = pTask->id.taskId;
STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet));
action.pCont = pReq;
@@ -1209,7 +1209,7 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// task id
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
- colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->taskId, false);
+ colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false);
// node type
char nodeType[20 + VARSTR_HEADER_SIZE] = {0};
diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c
index 3d1b356f8c..cefc4fa63e 100644
--- a/source/dnode/snode/src/snode.c
+++ b/source/dnode/snode/src/snode.c
@@ -32,6 +32,7 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) {
tDecoderClear(&decoder);
goto FAIL;
}
+
tDecoderClear(&decoder);
int32_t taskId = req.taskId;
@@ -65,7 +66,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
ASSERT(taosArrayGetSize(pTask->childEpInfo) != 0);
pTask->refCnt = 1;
- pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE;
+ pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->inputQueue = streamQueueOpen();
pTask->outputQueue = streamQueueOpen();
@@ -76,21 +77,19 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMsgCb = &pSnode->msgCb;
- pTask->startVer = ver;
+ pTask->chkInfo.version = ver;
+ pTask->pMeta = pSnode->pMeta;
pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1);
if (pTask->pState == NULL) {
return -1;
}
- SReadHandle mgHandle = {
- .vnode = NULL,
- .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo),
- .pStateBackend = pTask->pState,
- };
+ int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo);
+ SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState };
- pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0);
- ASSERT(pTask->exec.executor);
+ pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0);
+ ASSERT(pTask->exec.pExecutor);
streamSetupTrigger(pTask);
return 0;
@@ -140,9 +139,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
if (pTask == NULL) {
return -1;
}
+
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t *)msg, msgLen);
- code = tDecodeSStreamTask(&decoder, pTask);
+ code = tDecodeStreamTask(&decoder, pTask);
if (code < 0) {
tDecoderClear(&decoder);
taosMemoryFree(pTask);
@@ -153,7 +153,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
ASSERT(pTask->taskLevel == TASK_LEVEL__AGG);
// 2.save task
- code = streamMetaAddTask(pSnode->pMeta, -1, pTask);
+ code = streamMetaAddDeployedTask(pSnode->pMeta, -1, pTask);
if (code < 0) {
return -1;
}
diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt
index 9911752f8e..c713d1e247 100644
--- a/source/dnode/vnode/CMakeLists.txt
+++ b/source/dnode/vnode/CMakeLists.txt
@@ -57,6 +57,7 @@ target_sources(
# tq
"src/tq/tq.c"
+ "src/tq/tqUtil.c"
"src/tq/tqScan.c"
"src/tq/tqMeta.c"
"src/tq/tqRead.c"
@@ -64,6 +65,7 @@ target_sources(
"src/tq/tqPush.c"
"src/tq/tqSink.c"
"src/tq/tqCommit.c"
+ "src/tq/tqRestore.c"
"src/tq/tqSnapshot.c"
"src/tq/tqOffsetSnapshot.c"
)
diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h
index 7ecfadf728..fb2c2f4be3 100644
--- a/source/dnode/vnode/inc/vnode.h
+++ b/source/dnode/vnode/inc/vnode.h
@@ -256,15 +256,16 @@ void tqCloseReader(STqReader *);
void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList);
int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList);
-int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *tbUidList);
+int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList);
int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList);
int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id);
-void tqNextBlock(STqReader *pReader, SFetchRet *ret);
+void tqNextBlock(STqReader *pReader, SFetchRet *ret);
+int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData);
-int32_t tqReaderSetSubmitReq2(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver);
+int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver);
// int32_t tqReaderSetDataMsg(STqReader *pReader, const SSubmitReq *pMsg, int64_t ver);
-bool tqNextDataBlock2(STqReader *pReader);
+bool tqNextDataBlock(STqReader *pReader);
bool tqNextDataBlockFilterOut2(STqReader *pReader, SHashObj *filterOutUids);
int32_t tqRetrieveDataBlock2(SSDataBlock *pBlock, STqReader *pReader, SSubmitTbData **pSubmitTbDataRet);
int32_t tqRetrieveTaosxBlock2(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet);
diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h
index 9037644602..c007f84790 100644
--- a/source/dnode/vnode/src/inc/tq.h
+++ b/source/dnode/vnode/src/inc/tq.h
@@ -80,7 +80,7 @@ typedef struct {
typedef struct {
int8_t subType;
- STqReader* pExecReader;
+ STqReader* pTqReader;
qTaskInfo_t task;
union {
STqExecCol execCol;
@@ -128,6 +128,10 @@ typedef struct {
tmr_h timer;
} STqMgmt;
+typedef struct {
+ int32_t size;
+} STqOffsetHead;
+
static STqMgmt tqMgmt = {0};
int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle);
@@ -154,10 +158,6 @@ int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_
int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key);
int32_t tqMetaRestoreCheckInfo(STQ* pTq);
-typedef struct {
- int32_t size;
-} STqOffsetHead;
-
STqOffsetStore* tqOffsetOpen(STQ* pTq);
void tqOffsetClose(STqOffsetStore*);
STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey);
@@ -176,6 +176,18 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname);
// tqStream
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver);
+int32_t tqStreamTasksScanWal(STQ* pTq);
+
+// tq util
+void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId);
+int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver);
+int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset);
+int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);
+
+void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver);
+void saveOffsetForAllTasks(STQ* pTq, int64_t ver);
+void initOffsetForAllRestoreTasks(STQ* pTq);
+int32_t transferToWalReadTask(SStreamMeta* pStreamMeta, SArray* pTaskList);
#ifdef __cplusplus
}
diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h
index 253d5aebce..81f7c3d52a 100644
--- a/source/dnode/vnode/src/inc/vnodeInt.h
+++ b/source/dnode/vnode/src/inc/vnodeInt.h
@@ -192,9 +192,10 @@ void tqCleanUp();
STQ* tqOpen(const char* path, SVnode* pVnode);
void tqClose(STQ*);
int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver);
-int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp,
+int tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp,
int32_t type);
-int tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer);
+int tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer);
+int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed.
int tqCommit(STQ*);
int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd);
diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c
index 9501bf4b8e..795f281ab2 100644
--- a/source/dnode/vnode/src/meta/metaCache.c
+++ b/source/dnode/vnode/src/meta/metaCache.c
@@ -531,10 +531,11 @@ static void freePayload(const void* key, size_t keyLen, void* value) {
return;
}
- SHashObj* pHashObj = (SHashObj*)p[0];
+ SHashObj* pHashObj = (SHashObj*)p[0];
+
STagFilterResEntry** pEntry = taosHashGet(pHashObj, &p[1], sizeof(uint64_t));
- {
+ if (pEntry != NULL && (*pEntry) != NULL) {
int64_t st = taosGetTimestampUs();
SListIter iter = {0};
@@ -547,9 +548,9 @@ static void freePayload(const void* key, size_t keyLen, void* value) {
void* tmp = tdListPopNode(&((*pEntry)->list), pNode);
taosMemoryFree(tmp);
- int64_t et = taosGetTimestampUs();
- metaInfo("clear items in cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)),
- (et - st) / 1000.0);
+ double el = (taosGetTimestampUs() - st) / 1000.0;
+ metaInfo("clear items in meta-cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)),
+ el);
break;
}
}
diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c
index c75c675ec3..ce987ca88e 100644
--- a/source/dnode/vnode/src/sma/smaRollup.c
+++ b/source/dnode/vnode/src/sma/smaRollup.c
@@ -168,7 +168,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids,
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
if (pRSmaInfo->taskInfo[i]) {
- if ((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) {
+ if ((terrno = qUpdateTableListForStreamScanner(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) {
tdReleaseRSmaInfo(pSma, pRSmaInfo);
smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i,
terrstr());
diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c
index abc8a26369..1230a352d9 100644
--- a/source/dnode/vnode/src/tq/tq.c
+++ b/source/dnode/vnode/src/tq/tq.c
@@ -18,6 +18,7 @@
// 0: not init
// 1: already inited
// 2: wait to be inited or cleaup
+#define WAL_READ_TASKS_ID (-1)
int32_t tqInit() {
int8_t old;
@@ -61,12 +62,12 @@ static void destroyTqHandle(void* data) {
if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
} else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
- tqCloseReader(pData->execHandle.pExecReader);
+ tqCloseReader(pData->execHandle.pTqReader);
walCloseReader(pData->pWalReader);
taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
} else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
walCloseReader(pData->pWalReader);
- tqCloseReader(pData->execHandle.pExecReader);
+ tqCloseReader(pData->execHandle.pTqReader);
}
}
@@ -82,12 +83,18 @@ static void tqPushEntryFree(void* data) {
taosMemoryFree(p);
}
+static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
+ return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
+ pLeft->val.version <= pRight->val.version;
+}
+
STQ* tqOpen(const char* path, SVnode* pVnode) {
STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
if (pTq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
+
pTq->path = taosStrdup(path);
pTq->pVnode = pVnode;
pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
@@ -138,44 +145,6 @@ void tqClose(STQ* pTq) {
taosMemoryFree(pTq);
}
-int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) {
- int32_t len = 0;
- int32_t code = 0;
- tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code);
- if (code < 0) {
- return -1;
- }
- int32_t tlen = sizeof(SMqRspHead) + len;
- void* buf = rpcMallocCont(tlen);
- if (buf == NULL) {
- return -1;
- }
-
- ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP;
- ((SMqRspHead*)buf)->epoch = pReq->epoch;
- ((SMqRspHead*)buf)->consumerId = pReq->consumerId;
-
- void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
-
- SEncoder encoder = {0};
- tEncoderInit(&encoder, abuf, len);
- tEncodeSMqMetaRsp(&encoder, pRsp);
- tEncoderClear(&encoder);
-
- SRpcMsg resp = {
- .info = pMsg->info,
- .pCont = buf,
- .contLen = tlen,
- .code = 0,
- };
- tmsgSendRsp(&resp);
-
- tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d",
- TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type);
-
- return 0;
-}
-
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
int64_t consumerId, int32_t type) {
int32_t len = 0;
@@ -253,11 +222,6 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con
return 0;
}
-static FORCE_INLINE bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
- return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
- pLeft->val.version <= pRight->val.version;
-}
-
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
STqOffset offset = {0};
int32_t vgId = TD_VID(pTq->pVnode);
@@ -330,318 +294,6 @@ int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
return 0;
}
-static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) {
- pRsp->reqOffset = pReq->reqOffset;
-
- pRsp->blockData = taosArrayInit(0, sizeof(void*));
- pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));
-
- if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) {
- return -1;
- }
-
- pRsp->withTbName = 0;
- pRsp->withSchema = false;
- return 0;
-}
-
-static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) {
- pRsp->reqOffset = pReq->reqOffset;
-
- pRsp->withTbName = 1;
- pRsp->withSchema = 1;
- pRsp->blockData = taosArrayInit(0, sizeof(void*));
- pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));
- pRsp->blockTbName = taosArrayInit(0, sizeof(void*));
- pRsp->blockSchema = taosArrayInit(0, sizeof(void*));
-
- if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) {
- return -1;
- }
-
- return 0;
-}
-
-static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
- SRpcMsg* pMsg, bool* pBlockReturned) {
- uint64_t consumerId = pRequest->consumerId;
- STqOffsetVal reqOffset = pRequest->reqOffset;
- STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey);
- int32_t vgId = TD_VID(pTq->pVnode);
-
- *pBlockReturned = false;
-
- // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value.
- if (pOffset != NULL) {
- *pOffsetVal = pOffset->val;
-
- char formatBuf[80];
- tFormatOffset(formatBuf, 80, pOffsetVal);
- tqDebug("tmq poll: consumer:0x%" PRIx64
- ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%" PRIx64,
- consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId);
- return 0;
- } else {
- // no poll occurs in this vnode for this topic, let's seek to the right offset value.
- if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) {
- if (pRequest->useSnapshot) {
- tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot",
- consumerId, pHandle->subKey, vgId);
-
- if (pHandle->fetchMeta) {
- tqOffsetResetToMeta(pOffsetVal, 0);
- } else {
- tqOffsetResetToData(pOffsetVal, 0, 0);
- }
- } else {
- pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
- if (pHandle->pRef == NULL) {
- terrno = TSDB_CODE_OUT_OF_MEMORY;
- return -1;
- }
-
- // offset set to previous version when init
- tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1);
- }
- } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
- if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
- SMqDataRsp dataRsp = {0};
- tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);
-
- tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
- tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId,
- pHandle->subKey, vgId, dataRsp.rspOffset.version);
- int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
- tDeleteSMqDataRsp(&dataRsp);
-
- *pBlockReturned = true;
- return code;
- } else {
- STaosxRsp taosxRsp = {0};
- tqInitTaosxRsp(&taosxRsp, pRequest);
- tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
- int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
- tDeleteSTaosxRsp(&taosxRsp);
-
- *pBlockReturned = true;
- return code;
- }
- } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) {
- tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64
- " in vg %d, subkey %s, reset none failed",
- pHandle->subKey, consumerId, vgId, pRequest->subKey);
- terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET;
- return -1;
- }
- }
-
- return 0;
-}
-
-#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0)
-
-static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
- SRpcMsg* pMsg, STqOffsetVal* pOffset) {
- uint64_t consumerId = pRequest->consumerId;
- int32_t vgId = TD_VID(pTq->pVnode);
-
- SMqDataRsp dataRsp = {0};
- tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);
-
- // lock
- taosWLockLatch(&pTq->lock);
-
- qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId);
- int code = tqScanData(pTq, pHandle, &dataRsp, pOffset);
- if (code != 0) {
- goto end;
- }
-
- // till now, all data has been transferred to consumer, new data needs to push client once arrived.
- if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG &&
- dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) {
- code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
- taosWUnLockLatch(&pTq->lock);
- return code;
- }
-
- code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP);
-
- // NOTE: this pHandle->consumerId may have been changed already.
-
-end : {
- char buf[80] = {0};
- tFormatOffset(buf, 80, &dataRsp.rspOffset);
- tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64
- " code:%d",
- consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code);
- taosWUnLockLatch(&pTq->lock);
- tDeleteSMqDataRsp(&dataRsp);
-}
- return code;
-}
-
-static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
- SRpcMsg* pMsg, STqOffsetVal* offset) {
- int code = 0;
- int32_t vgId = TD_VID(pTq->pVnode);
- SWalCkHead* pCkHead = NULL;
- SMqMetaRsp metaRsp = {0};
- STaosxRsp taosxRsp = {0};
- tqInitTaosxRsp(&taosxRsp, pRequest);
-
- if (offset->type != TMQ_OFFSET__LOG) {
- if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) {
- return -1;
- }
-
- if (metaRsp.metaRspLen > 0) {
- code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp);
- tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64
- ",ts:%" PRId64,
- pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid,
- metaRsp.rspOffset.ts);
- taosMemoryFree(metaRsp.metaRsp);
- tDeleteSTaosxRsp(&taosxRsp);
- return code;
- }
-
- tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64
- ",ts:%" PRId64,
- pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type,
- taosxRsp.rspOffset.uid, taosxRsp.rspOffset.ts);
- if (taosxRsp.blockNum > 0) {
- code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
- tDeleteSTaosxRsp(&taosxRsp);
- return code;
- } else {
- *offset = taosxRsp.rspOffset;
- }
- }
-
- if (offset->type == TMQ_OFFSET__LOG) {
- int64_t fetchVer = offset->version + 1;
- pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048);
- if (pCkHead == NULL) {
- tDeleteSTaosxRsp(&taosxRsp);
- terrno = TSDB_CODE_OUT_OF_MEMORY;
- return -1;
- }
- walSetReaderCapacity(pHandle->pWalReader, 2048);
- int totalRows = 0;
- while (1) {
- int32_t savedEpoch = atomic_load_32(&pHandle->epoch);
- if (savedEpoch > pRequest->epoch) {
- tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64
- ", found new consumer epoch %d, discard req epoch %d",
- pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch);
- break;
- }
-
- if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) {
- tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
- code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
- tDeleteSTaosxRsp(&taosxRsp);
- taosMemoryFreeClear(pCkHead);
- return code;
- }
-
- SWalCont* pHead = &pCkHead->head;
- tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d",
- pRequest->consumerId, pRequest->epoch, vgId, fetchVer, pHead->msgType);
-
- // process meta
- if (pHead->msgType != TDMT_VND_SUBMIT) {
- if (totalRows > 0) {
- tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1);
- code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
- tDeleteSTaosxRsp(&taosxRsp);
- taosMemoryFreeClear(pCkHead);
- return code;
- }
-
- tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType));
- tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer);
- metaRsp.resMsgType = pHead->msgType;
- metaRsp.metaRspLen = pHead->bodyLen;
- metaRsp.metaRsp = pHead->body;
- if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) {
- code = -1;
- taosMemoryFreeClear(pCkHead);
- tDeleteSTaosxRsp(&taosxRsp);
- return code;
- }
- code = 0;
- taosMemoryFreeClear(pCkHead);
- tDeleteSTaosxRsp(&taosxRsp);
- return code;
- }
-
- // process data
- SPackedData submit = {
- .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)),
- .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg),
- .ver = pHead->version,
- };
-
- if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) {
- tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId,
- pRequest->subKey);
- taosMemoryFreeClear(pCkHead);
- tDeleteSTaosxRsp(&taosxRsp);
- return -1;
- }
-
- if (totalRows >= 4096 || taosxRsp.createTableNum > 0) {
- tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
- code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
- tDeleteSTaosxRsp(&taosxRsp);
- taosMemoryFreeClear(pCkHead);
- return code;
- } else {
- fetchVer++;
- }
- }
- }
-
- tDeleteSTaosxRsp(&taosxRsp);
- taosMemoryFreeClear(pCkHead);
- return 0;
-}
-
-static int32_t doPollDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) {
- int32_t code = -1;
- STqOffsetVal offset = {0};
- STqOffsetVal reqOffset = pRequest->reqOffset;
-
- // 1. reset the offset if needed
- if (IS_OFFSET_RESET_TYPE(reqOffset.type)) {
- // handle the reset offset cases, according to the consumer's choice.
- bool blockReturned = false;
- code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned);
- if (code != 0) {
- return code;
- }
-
- // empty block returned, quit
- if (blockReturned) {
- return 0;
- }
- } else { // use the consumer specified offset
- // the offset value can not be monotonious increase??
- offset = reqOffset;
- }
-
- // this is a normal subscribe requirement
- if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
- return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
- }
-
- // todo handle the case where re-balance occurs.
- // for taosx
- return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
-}
-
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
SMqPollReq req = {0};
if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
@@ -689,7 +341,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
- return doPollDataForMq(pTq, pHandle, &req, pMsg);
+ return tqExtractDataForMq(pTq, pHandle, &req, pMsg);
}
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
@@ -815,10 +467,10 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
&pHandle->execHandle.numOfCols, req.newConsumerId);
void* scanner = NULL;
qExtractStreamScanner(pHandle->execHandle.task, &scanner);
- pHandle->execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner);
+ pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
} else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
- pHandle->execHandle.pExecReader = tqOpenReader(pVnode);
+ pHandle->execHandle.pTqReader = tqOpenReader(pVnode);
pHandle->execHandle.execDb.pFilterOutTbUid =
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
@@ -837,8 +489,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i);
tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid);
}
- pHandle->execHandle.pExecReader = tqOpenReader(pVnode);
- tqReaderSetTbUidList(pHandle->execHandle.pExecReader, tbUidList);
+ pHandle->execHandle.pTqReader = tqOpenReader(pVnode);
+ tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList);
taosArrayDestroy(tbUidList);
buildSnapContext(handle.meta, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta,
@@ -874,7 +526,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
atomic_store_32(&pHandle->epoch, -1);
// remove if it has been register in the push manager, and return one empty block to consumer
- tqUnregisterPushEntry(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true);
+ tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true);
atomic_store_64(&pHandle->consumerId, req.newConsumerId);
atomic_add_fetch_32(&pHandle->epoch, 1);
@@ -896,16 +548,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
}
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
-#if 0
- if (pTask->taskLevel == TASK_LEVEL__AGG) {
- A(taosArrayGetSize(pTask->childEpInfo) != 0);
- }
-#endif
+ // todo extract method
+ char buf[128] = {0};
+ sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId);
int32_t vgId = TD_VID(pTq->pVnode);
+ pTask->id.idStr = taosStrdup(buf);
pTask->refCnt = 1;
- pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE;
-
+ pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->inputQueue = streamQueueOpen();
pTask->outputQueue = streamQueueOpen();
@@ -916,11 +566,14 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMsgCb = &pTq->pVnode->msgCb;
- pTask->startVer = ver;
+ pTask->pMeta = pTq->pStreamMeta;
+ pTask->chkInfo.version = ver;
// expand executor
if (pTask->fillHistory) {
- pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM;
+ pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM;
+ } else {
+ pTask->status.taskStatus = TASK_STATUS__RESTORE;
}
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
@@ -930,14 +583,10 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
}
SReadHandle handle = {
- .meta = pTq->pVnode->pMeta,
- .vnode = pTq->pVnode,
- .initTqReader = 1,
- .pStateBackend = pTask->pState,
- };
+ .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
- pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
- if (pTask->exec.executor == NULL) {
+ pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
+ if (pTask->exec.pExecutor == NULL) {
return -1;
}
@@ -946,14 +595,12 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
if (pTask->pState == NULL) {
return -1;
}
- SReadHandle mgHandle = {
- .vnode = NULL,
- .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo),
- .pStateBackend = pTask->pState,
- };
- pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId);
- if (pTask->exec.executor == NULL) {
+ int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo);
+ SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};
+
+ pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId);
+ if (pTask->exec.pExecutor == NULL) {
return -1;
}
}
@@ -974,16 +621,20 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
ver1 = info.skmVer;
}
- pTask->tbSink.pTSchema =
- tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, ver1);
- if (pTask->tbSink.pTSchema == NULL) {
+ SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper;
+ pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
+ if(pTask->tbSink.pTSchema == NULL) {
return -1;
}
}
+ if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
+ pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL);
+ }
+
streamSetupTrigger(pTask);
- tqInfo("expand stream task on vg %d, task id %d, child id %d, level %d", vgId, pTask->taskId, pTask->selfChildId,
- pTask->taskLevel);
+ tqInfo("vgId:%d expand stream task, s-task:%s, ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr,
+ pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel);
return 0;
}
@@ -1006,18 +657,24 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
.upstreamNodeId = req.upstreamNodeId,
.upstreamTaskId = req.upstreamTaskId,
};
+
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
- if (pTask && atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL) {
- rsp.status = 1;
+ if (pTask) {
+ rsp.status = (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) ? 1 : 0;
+ streamMetaReleaseTask(pTq->pStreamMeta, pTask);
+
+ tqDebug("tq recv task check req(reqId:0x%" PRIx64
+ ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d",
+ rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, rsp.upstreamTaskId,
+ rsp.upstreamNodeId, rsp.status);
} else {
rsp.status = 0;
+ tqDebug("tq recv task check(taskId:%d not built yet) req(reqId:0x%" PRIx64
+ ") %d at node %d, check req from task %d at node %d, rsp status %d",
+ taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId,
+ rsp.status);
}
- if (pTask) streamMetaReleaseTask(pTq->pStreamMeta, pTask);
-
- tqDebug("tq recv task check req(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d",
- rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
-
SEncoder encoder;
int32_t code;
int32_t len;
@@ -1035,13 +692,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
tEncoderClear(&encoder);
- SRpcMsg rspMsg = {
- .code = 0,
- .pCont = buf,
- .contLen = sizeof(SMsgHead) + len,
- .info = pMsg->info,
- };
-
+ SRpcMsg rspMsg = { .code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info };
tmsgSendRsp(&rspMsg);
return 0;
}
@@ -1057,8 +708,8 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32
tDecoderClear(&decoder);
return -1;
}
- tDecoderClear(&decoder);
+ tDecoderClear(&decoder);
tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d",
rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
@@ -1090,17 +741,20 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
- code = tDecodeSStreamTask(&decoder, pTask);
+ code = tDecodeStreamTask(&decoder, pTask);
if (code < 0) {
tDecoderClear(&decoder);
taosMemoryFree(pTask);
return -1;
}
+
tDecoderClear(&decoder);
// 2.save task
- code = streamMetaAddTask(pTq->pStreamMeta, sversion, pTask);
+ code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask);
if (code < 0) {
+ tqError("vgId:%d failed to add s-task:%s, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr,
+ streamMetaGetNumOfTasks(pTq->pStreamMeta));
return -1;
}
@@ -1109,6 +763,8 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms
streamTaskCheckDownstream(pTask, sversion);
}
+ tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", TD_VID(pTq->pVnode),
+ pTask->id.idStr, pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta));
return 0;
}
@@ -1124,7 +780,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
}
// check param
- int64_t fillVer1 = pTask->startVer;
+ int64_t fillVer1 = pTask->chkInfo.version;
if (fillVer1 <= 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1;
@@ -1133,7 +789,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
// do recovery step 1
streamSourceRecoverScanStep1(pTask);
- if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
+ if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
}
@@ -1148,7 +804,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
- if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
+ if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
return 0;
}
@@ -1190,7 +846,7 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t
return -1;
}
- if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
+ if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
}
@@ -1309,7 +965,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->taskLevel != TASK_LEVEL__SOURCE) continue;
- qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->taskId, ver);
+ qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->id.taskId, ver);
if (!failed) {
SStreamRefDataBlock* pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
@@ -1318,8 +974,8 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
pRefBlock->dataRef = pRef;
atomic_add_fetch_32(pRefBlock->dataRef, 1);
- if (tAppendDataForStream(pTask, (SStreamQueueItem*)pRefBlock) < 0) {
- qError("stream task input del failed, task id %d", pTask->taskId);
+ if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pRefBlock) < 0) {
+ qError("stream task input del failed, task id %d", pTask->id.taskId);
atomic_sub_fetch_32(pRef, 1);
taosFreeQitem(pRefBlock);
@@ -1327,7 +983,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
}
if (streamSchedExec(pTask) < 0) {
- qError("stream task launch failed, task id %d", pTask->taskId);
+ qError("stream task launch failed, task id %d", pTask->id.taskId);
continue;
}
@@ -1353,13 +1009,13 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
taosArrayPush(pStreamBlock->blocks, &block);
if (!failed) {
- if (tAppendDataForStream(pTask, (SStreamQueueItem*)pStreamBlock) < 0) {
- qError("stream task input del failed, task id %d", pTask->taskId);
+ if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pStreamBlock) < 0) {
+ qError("stream task input del failed, task id %d", pTask->id.taskId);
continue;
}
if (streamSchedExec(pTask) < 0) {
- qError("stream task launch failed, task id %d", pTask->taskId);
+ qError("stream task launch failed, task id %d", pTask->id.taskId);
continue;
}
} else {
@@ -1372,17 +1028,32 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
return 0;
}
-int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) {
- void* pIter = NULL;
- bool succ = true;
+static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit,
+ const char* key, int64_t ver) {
+ doSaveTaskOffset(pOffsetStore, key, ver);
+ int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver);
- SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit);
+ // remove the offset, if all functions are completed successfully.
+ if (code == TSDB_CODE_SUCCESS) {
+ tqOffsetDelete(pOffsetStore, key);
+ }
+
+ return code;
+}
+
+int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) {
+#if 0
+ void* pIter = NULL;
+ SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT);
if (pSubmit == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("failed to create data submit for stream since out of memory");
- succ = false;
+ saveOffsetForAllTasks(pTq, submit.ver);
+ return -1;
}
+ SArray* pInputQueueFullTasks = taosArrayInit(4, POINTER_BYTES);
+
while (1) {
pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
if (pIter == NULL) {
@@ -1394,47 +1065,75 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) {
continue;
}
- if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
- tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->taskId, pTask->taskStatus);
+ if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
+ tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId,
+ pTask->status.taskStatus);
continue;
}
- tqDebug("data submit enqueue stream task:%d, ver: %" PRId64, pTask->taskId, submit.ver);
- if (succ) {
- int32_t code = tAppendDataForStream(pTask, (SStreamQueueItem*)pSubmit);
- if (code < 0) {
- // let's handle the back pressure
+ // check if offset value exists
+ char key[128] = {0};
+ createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId);
- tqError("stream task:%d failed to put into queue for, too many", pTask->taskId);
- continue;
+ if (tInputQueueIsFull(pTask)) {
+ STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);
+
+ int64_t ver = submit.ver;
+ if (pOffset == NULL) {
+ doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver);
+ } else {
+ ver = pOffset->val.version;
}
- if (streamSchedExec(pTask) < 0) {
- tqError("stream task:%d launch failed, code:%s", pTask->taskId, tstrerror(terrno));
- continue;
- }
- } else {
- streamTaskInputFail(pTask);
+ tqDebug("s-task:%s input queue is full, discard submit block, ver:%" PRId64, pTask->id.idStr, ver);
+ taosArrayPush(pInputQueueFullTasks, &pTask);
+ continue;
}
+
+ // check if offset value exists
+ STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);
+ ASSERT(pOffset == NULL);
+
+ addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver);
}
- if (pSubmit != NULL) {
- streamDataSubmitDestroy(pSubmit);
- taosFreeQitem(pSubmit);
- }
+ streamDataSubmitDestroy(pSubmit);
+ taosFreeQitem(pSubmit);
+#endif
- return succ ? 0 : -1;
+ tqStartStreamTasks(pTq);
+ return 0;
}
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
SStreamTaskRunReq* pReq = pMsg->pCont;
- int32_t taskId = pReq->taskId;
- SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
- if (pTask) {
- streamProcessRunReq(pTask);
+
+ int32_t taskId = pReq->taskId;
+ int32_t vgId = TD_VID(pTq->pVnode);
+
+ if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal
+ tqStreamTasksScanWal(pTq);
+ return 0;
+ }
+
+ SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
+ if (pTask != NULL) {
+ if (pTask->status.taskStatus == TASK_STATUS__NORMAL) {
+ tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr);
+ streamProcessRunReq(pTask);
+ } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) {
+ tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId,
+ pTask->id.idStr, pTask->chkInfo.version);
+ streamProcessRunReq(pTask);
+ } else {
+ tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
+ }
+
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
+ tqStartStreamTasks(pTq);
return 0;
} else {
+ tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId);
return -1;
}
}
@@ -1447,14 +1146,10 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
tDecodeStreamDispatchReq(&decoder, &req);
- int32_t taskId = req.taskId;
- SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
+ SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
if (pTask) {
- SRpcMsg rsp = {
- .info = pMsg->info,
- .code = 0,
- };
+ SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
streamProcessDispatchReq(pTask, &req, &rsp, exec);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
@@ -1467,7 +1162,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t taskId = ntohl(pRsp->upstreamTaskId);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
- tqDebug("recv dispatch rsp, code: %x", pMsg->code);
+ tqDebug("recv dispatch rsp, code:%x", pMsg->code);
if (pTask) {
streamProcessDispatchRsp(pTask, pRsp, pMsg->code);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
@@ -1495,10 +1190,7 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
int32_t taskId = req.dstTaskId;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask) {
- SRpcMsg rsp = {
- .info = pMsg->info,
- .code = 0,
- };
+ SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
streamProcessRetrieveReq(pTask, &req, &rsp);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tDeleteStreamRetrieveReq(&req);
@@ -1534,10 +1226,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask) {
- SRpcMsg rsp = {
- .info = pMsg->info,
- .code = 0,
- };
+ SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
streamProcessDispatchReq(pTask, &req, &rsp, false);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
rpcFreeCont(pMsg->pCont);
@@ -1554,10 +1243,7 @@ FAIL:
SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
if (pRspHead == NULL) {
- SRpcMsg rsp = {
- .code = TSDB_CODE_OUT_OF_MEMORY,
- .info = pMsg->info,
- };
+ SRpcMsg rsp = { .code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info };
tqDebug("send dispatch error rsp, code: %x", code);
tmsgSendRsp(&rsp);
rpcFreeCont(pMsg->pCont);
@@ -1575,11 +1261,7 @@ FAIL:
pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL;
SRpcMsg rsp = {
- .code = code,
- .info = pMsg->info,
- .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp),
- .pCont = pRspHead,
- };
+ .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead};
tqDebug("send dispatch error rsp, code: %x", code);
tmsgSendRsp(&rsp);
rpcFreeCont(pMsg->pCont);
@@ -1588,3 +1270,40 @@ FAIL:
}
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
+
+int32_t tqStartStreamTasks(STQ* pTq) {
+ int32_t vgId = TD_VID(pTq->pVnode);
+
+ SStreamMeta* pMeta = pTq->pStreamMeta;
+ taosWLockLatch(&pMeta->lock);
+ pMeta->walScan += 1;
+
+ if (pMeta->walScan > 1) {
+ tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScan);
+ taosWUnLockLatch(&pTq->pStreamMeta->lock);
+ return 0;
+ }
+
+ SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
+ if (pRunReq == NULL) {
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno));
+ taosWUnLockLatch(&pTq->pStreamMeta->lock);
+ return -1;
+ }
+
+ int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks);
+
+ tqInfo("vgId:%d start wal scan stream tasks, tasks:%d", vgId, numOfTasks);
+ initOffsetForAllRestoreTasks(pTq);
+
+ pRunReq->head.vgId = vgId;
+ pRunReq->streamId = 0;
+ pRunReq->taskId = WAL_READ_TASKS_ID;
+
+ SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
+ tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
+ taosWUnLockLatch(&pTq->pStreamMeta->lock);
+
+ return 0;
+}
diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c
index 7fc66c4919..0f5daa31ad 100644
--- a/source/dnode/vnode/src/tq/tqCommit.c
+++ b/source/dnode/vnode/src/tq/tqCommit.c
@@ -16,10 +16,13 @@
#include "tq.h"
int tqCommit(STQ* pTq) {
+#if 0
+ // stream meta commit does not be aligned to the vnode commit
if (streamMetaCommit(pTq->pStreamMeta) < 0) {
tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr());
return -1;
}
+#endif
return tqOffsetCommitFile(pTq->pOffsetStore);
}
diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c
index 7b0cdab2f8..cd8cefb307 100644
--- a/source/dnode/vnode/src/tq/tqMeta.c
+++ b/source/dnode/vnode/src/tq/tqMeta.c
@@ -320,15 +320,15 @@ int32_t tqMetaRestoreHandle(STQ* pTq) {
code = -1;
goto end;
}
- handle.execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner);
- if (handle.execHandle.pExecReader == NULL) {
+ handle.execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
+ if (handle.execHandle.pTqReader == NULL) {
tqError("cannot extract exec reader for %s", handle.subKey);
code = -1;
goto end;
}
} else if (handle.execHandle.subType == TOPIC_SUB_TYPE__DB) {
handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL);
- handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode);
+ handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode);
buildSnapContext(reader.meta, reader.version, 0, handle.execHandle.subType, handle.fetchMeta,
(SSnapContext**)(&reader.sContext));
@@ -343,8 +343,8 @@ int32_t tqMetaRestoreHandle(STQ* pTq) {
int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i);
tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid);
}
- handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode);
- tqReaderSetTbUidList(handle.execHandle.pExecReader, tbUidList);
+ handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode);
+ tqReaderSetTbUidList(handle.execHandle.pTqReader, tbUidList);
taosArrayDestroy(tbUidList);
buildSnapContext(reader.meta, reader.version, handle.execHandle.execTb.suid, handle.execHandle.subType,
diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c
index 66d1ac2c7e..e8051a1406 100644
--- a/source/dnode/vnode/src/tq/tqOffset.c
+++ b/source/dnode/vnode/src/tq/tqOffset.c
@@ -128,31 +128,35 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey) {
}
int32_t tqOffsetCommitFile(STqOffsetStore* pStore) {
- if (!pStore->needCommit) return 0;
+ if (!pStore->needCommit) {
+ return 0;
+ }
+
// TODO file name should be with a newer version
char* fname = tqOffsetBuildFName(pStore->pTq->path, 0);
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pFile == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
-
- int32_t err = terrno;
- const char* errStr = tstrerror(err);
- int32_t sysErr = errno;
- const char* sysErrStr = strerror(errno);
- tqError("vgId:%d, cannot open file %s when commit offset since %s", pStore->pTq->pVnode->config.vgId, fname,
- sysErrStr);
+ const char* err = strerror(errno);
+ tqError("vgId:%d, failed to open offset file %s, since %s", TD_VID(pStore->pTq->pVnode), fname, err);
taosMemoryFree(fname);
return -1;
}
+
taosMemoryFree(fname);
+
void* pIter = NULL;
while (1) {
pIter = taosHashIterate(pStore->pHash, pIter);
- if (pIter == NULL) break;
+ if (pIter == NULL) {
+ break;
+ }
+
STqOffset* pOffset = (STqOffset*)pIter;
int32_t bodyLen;
int32_t code;
tEncodeSize(tEncodeSTqOffset, pOffset, bodyLen, code);
+
if (code < 0) {
taosHashCancelIterate(pStore->pHash, pIter);
return -1;
@@ -166,6 +170,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) {
SEncoder encoder;
tEncoderInit(&encoder, abuf, bodyLen);
tEncodeSTqOffset(&encoder, pOffset);
+
// write file
int64_t writeLen;
if ((writeLen = taosWriteFile(pFile, buf, totLen)) != totLen) {
@@ -174,8 +179,10 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) {
taosMemoryFree(buf);
return -1;
}
+
taosMemoryFree(buf);
}
+
// close and rename file
taosCloseFile(&pFile);
pStore->needCommit = 0;
diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c
index a406a793dc..7a1a6b7454 100644
--- a/source/dnode/vnode/src/tq/tqPush.c
+++ b/source/dnode/vnode/src/tq/tqPush.c
@@ -323,15 +323,22 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v
taosWUnLockLatch(&pTq->lock);
}
- // push data for stream processing
- if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode)) {
+ tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks));
+
+ // push data for stream processing:
+ // 1. the vnode has already been restored.
+ // 2. the vnode should be the leader.
+ // 3. the stream is not suspended yet.
+ if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) {
if (taosHashGetSize(pTq->pStreamMeta->pTasks) == 0) {
return 0;
}
if (msgType == TDMT_VND_SUBMIT) {
+#if 0
void* data = taosMemoryMalloc(len);
if (data == NULL) {
+ // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", vgId);
return -1;
@@ -340,7 +347,10 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v
memcpy(data, pReq, len);
SPackedData submit = {.msgStr = data, .msgLen = len, .ver = ver};
- tqDebug("tq copy write msg %p %d %" PRId64 " from %p", data, len, ver, pReq);
+ tqDebug("vgId:%d tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", vgId, data, len, ver, pReq);
+ tqProcessSubmitReq(pTq, submit);
+#endif
+ SPackedData submit = {0};
tqProcessSubmitReq(pTq, submit);
}
@@ -352,7 +362,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v
return 0;
}
-int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp,
+int32_t tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp,
int32_t type) {
uint64_t consumerId = pRequest->consumerId;
int32_t vgId = TD_VID(pTq->pVnode);
@@ -389,7 +399,7 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest,
return 0;
}
-int32_t tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) {
+int32_t tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) {
int32_t vgId = TD_VID(pTq->pVnode);
STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen);
diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c
index 72b478e6bf..25ab7209d2 100644
--- a/source/dnode/vnode/src/tq/tqRead.c
+++ b/source/dnode/vnode/src/tq/tqRead.c
@@ -113,7 +113,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) {
}
SMetaReader mr = {0};
- metaReaderInit(&mr, pHandle->execHandle.pExecReader->pVnodeMeta, 0);
+ metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0);
if (metaGetTableEntryByName(&mr, req.tbName) < 0) {
metaReaderClear(&mr);
@@ -262,8 +262,6 @@ STqReader* tqOpenReader(SVnode* pVnode) {
}
pReader->pVnodeMeta = pVnode->pMeta;
- /*pReader->pMsg = NULL;*/
-// pReader->ver = -1;
pReader->pColIdList = NULL;
pReader->cachedSchemaVer = 0;
pReader->cachedSchemaSuid = 0;
@@ -298,7 +296,29 @@ int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) {
if (walReadSeekVer(pReader->pWalReader, ver) < 0) {
return -1;
}
- tqDebug("tmq poll: wal reader seek to ver success ver:%"PRId64" %s", ver, id);
+ tqDebug("wal reader seek to ver:%"PRId64" %s", ver, id);
+ return 0;
+}
+
+int32_t extractSubmitMsgFromWal(SWalReader* pReader, SPackedData* pPackedData) {
+ if (walNextValidMsg(pReader) < 0) {
+ return -1;
+ }
+
+ void* pBody = POINTER_SHIFT(pReader->pHead->head.body, sizeof(SSubmitReq2Msg));
+ int32_t len = pReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg);
+ int64_t ver = pReader->pHead->head.version;
+
+ void* data = taosMemoryMalloc(len);
+ if (data == NULL) {
+ // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", 0);
+ return -1;
+ }
+
+ memcpy(data, pBody, len);
+ *pPackedData = (SPackedData){.ver = ver, .msgLen = len, .msgStr = data};
return 0;
}
@@ -309,26 +329,28 @@ void tqNextBlock(STqReader* pReader, SFetchRet* ret) {
ret->fetchType = FETCH_TYPE__NONE;
return;
}
- void* body = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg));
+
+ void* pBody = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg));
int32_t bodyLen = pReader->pWalReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg);
int64_t ver = pReader->pWalReader->pHead->head.version;
- tqReaderSetSubmitReq2(pReader, body, bodyLen, ver);
+ tqReaderSetSubmitMsg(pReader, pBody, bodyLen, ver);
}
- while (tqNextDataBlock2(pReader)) {
+ while (tqNextDataBlock(pReader)) {
memset(&ret->data, 0, sizeof(SSDataBlock));
int32_t code = tqRetrieveDataBlock2(&ret->data, pReader, NULL);
if (code != 0 || ret->data.info.rows == 0) {
continue;
}
+
ret->fetchType = FETCH_TYPE__DATA;
return;
}
}
}
-int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) {
+int32_t tqReaderSetSubmitMsg(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) {
pReader->msg2.msgStr = msgStr;
pReader->msg2.msgLen = msgLen;
pReader->msg2.ver = ver;
@@ -345,7 +367,7 @@ int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen,
return 0;
}
-bool tqNextDataBlock2(STqReader* pReader) {
+bool tqNextDataBlock(STqReader* pReader) {
if (pReader->msg2.msgStr == NULL) {
return false;
}
@@ -354,13 +376,20 @@ bool tqNextDataBlock2(STqReader* pReader) {
while (pReader->nextBlk < blockSz) {
tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg2.msgStr, pReader->msg2.msgLen,
pReader->msg2.ver, pReader->nextBlk);
+
SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk);
- if (pReader->tbIdHash == NULL) return true;
+ if (pReader->tbIdHash == NULL) {
+ return true;
+ }
void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t));
if (ret != NULL) {
+ tqDebug("tq reader block found, ver:%"PRId64", uid:%"PRId64, pReader->msg2.ver, pSubmitTbData->uid);
return true;
+ } else {
+ tqDebug("tq reader discard block, uid:%"PRId64", continue", pSubmitTbData->uid);
}
+
pReader->nextBlk++;
}
@@ -427,7 +456,10 @@ int32_t tqRetrieveDataBlock2(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbD
SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk);
pReader->nextBlk++;
- if (pSubmitTbDataRet) *pSubmitTbDataRet = pSubmitTbData;
+ if (pSubmitTbDataRet) {
+ *pSubmitTbDataRet = pSubmitTbData;
+ }
+
int32_t sversion = pSubmitTbData->sver;
int64_t suid = pSubmitTbData->suid;
int64_t uid = pSubmitTbData->uid;
@@ -900,7 +932,7 @@ int tqReaderSetTbUidList(STqReader* pReader, const SArray* tbUidList) {
return 0;
}
-int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) {
+int tqReaderAddTbUidList(STqReader* pReader, const SArray* pTableUidList) {
if (pReader->tbIdHash == NULL) {
pReader->tbIdHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK);
if (pReader->tbIdHash == NULL) {
@@ -909,8 +941,9 @@ int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) {
}
}
- for (int i = 0; i < taosArrayGetSize(tbUidList); i++) {
- int64_t* pKey = (int64_t*)taosArrayGet(tbUidList, i);
+ int32_t numOfTables = taosArrayGetSize(pTableUidList);
+ for (int i = 0; i < numOfTables; i++) {
+ int64_t* pKey = (int64_t*)taosArrayGet(pTableUidList, i);
taosHashPut(pReader->tbIdHash, pKey, sizeof(int64_t), NULL, 0);
}
@@ -926,30 +959,34 @@ int tqReaderRemoveTbUidList(STqReader* pReader, const SArray* tbUidList) {
return 0;
}
+// todo update the table list in wal reader
int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) {
- void* pIter = NULL;
+ void* pIter = NULL;
+ int32_t vgId = TD_VID(pTq->pVnode);
+
+ // update the table list for each consumer handle
while (1) {
pIter = taosHashIterate(pTq->pHandle, pIter);
if (pIter == NULL) {
break;
}
- STqHandle* pExec = (STqHandle*)pIter;
- if (pExec->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
- int32_t code = qUpdateQualifiedTableId(pExec->execHandle.task, tbUidList, isAdd);
+ STqHandle* pTqHandle = (STqHandle*)pIter;
+ if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
+ int32_t code = qUpdateTableListForStreamScanner(pTqHandle->execHandle.task, tbUidList, isAdd);
if (code != 0) {
- tqError("update qualified table error for %s", pExec->subKey);
+ tqError("update qualified table error for %s", pTqHandle->subKey);
continue;
}
- } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__DB) {
+ } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
if (!isAdd) {
int32_t sz = taosArrayGetSize(tbUidList);
for (int32_t i = 0; i < sz; i++) {
int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i);
- taosHashPut(pExec->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0);
+ taosHashPut(pTqHandle->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0);
}
}
- } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
+ } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
if (isAdd) {
SArray* qa = taosArrayInit(4, sizeof(tb_uid_t));
SMetaReader mr = {0};
@@ -964,35 +1001,43 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) {
}
tDecoderClear(&mr.coder);
-
- if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pExec->execHandle.execTb.suid) {
+ if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pTqHandle->execHandle.execTb.suid) {
tqDebug("table uid %" PRId64 " does not add to tq handle", *id);
continue;
}
+
tqDebug("table uid %" PRId64 " add to tq handle", *id);
taosArrayPush(qa, id);
}
+
metaReaderClear(&mr);
if (taosArrayGetSize(qa) > 0) {
- tqReaderAddTbUidList(pExec->execHandle.pExecReader, qa);
+ tqReaderAddTbUidList(pTqHandle->execHandle.pTqReader, qa);
}
+
taosArrayDestroy(qa);
} else {
- tqReaderRemoveTbUidList(pExec->execHandle.pExecReader, tbUidList);
+ tqReaderRemoveTbUidList(pTqHandle->execHandle.pTqReader, tbUidList);
}
}
}
+
+ // update the table list handle for each stream scanner/wal reader
while (1) {
pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
- if (pIter == NULL) break;
+ if (pIter == NULL) {
+ break;
+ }
+
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
- int32_t code = qUpdateQualifiedTableId(pTask->exec.executor, tbUidList, isAdd);
+ int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd);
if (code != 0) {
- tqError("update qualified table error for stream task %d", pTask->taskId);
+ tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr);
continue;
}
}
}
+
return 0;
}
diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c
new file mode 100644
index 0000000000..6ed74ddcc3
--- /dev/null
+++ b/source/dnode/vnode/src/tq/tqRestore.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+#include "tq.h"
+
+static int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle);
+static int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList);
+
+// this function should be executed by stream threads.
+// there is a case that the WAL increases more fast than the restore procedure, and this restore procedure
+// will not stop eventually.
+int tqStreamTasksScanWal(STQ* pTq) {
+ int32_t vgId = TD_VID(pTq->pVnode);
+ SStreamMeta* pMeta = pTq->pStreamMeta;
+ int64_t st = taosGetTimestampMs();
+
+ while (1) {
+ tqInfo("vgId:%d continue check if data in wal are available", vgId);
+
+ // check all restore tasks
+ bool allFull = true;
+ streamTaskReplayWal(pTq->pStreamMeta, pTq->pOffsetStore, &allFull);
+
+ int32_t times = 0;
+
+ if (allFull) {
+ taosWLockLatch(&pMeta->lock);
+ pMeta->walScan -= 1;
+ times = pMeta->walScan;
+
+ if (pMeta->walScan <= 0) {
+ taosWUnLockLatch(&pMeta->lock);
+ break;
+ }
+
+ taosWUnLockLatch(&pMeta->lock);
+ tqInfo("vgId:%d scan wal for stream tasks for %d times", vgId, times);
+ }
+ }
+
+ double el = (taosGetTimestampMs() - st) / 1000.0;
+ tqInfo("vgId:%d scan wal for stream tasks completed, elapsed time:%.2f sec", vgId, el);
+
+ // restore wal scan flag
+// atomic_store_8(&pTq->pStreamMeta->walScan, 0);
+ return 0;
+}
+
+//int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) {
+// int32_t numOfTask = taosArrayGetSize(pTaskList);
+// if (numOfTask <= 0) {
+// return TSDB_CODE_SUCCESS;
+// }
+//
+// // todo: add lock
+// for (int32_t i = 0; i < numOfTask; ++i) {
+// SStreamTask* pTask = taosArrayGetP(pTaskList, i);
+// tqDebug("vgId:%d transfer s-task:%s state restore -> ready, checkpoint:%" PRId64 " checkpoint id:%" PRId64,
+// pStreamMeta->vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->chkInfo.id);
+// taosHashRemove(pStreamMeta->pWalReadTasks, &pTask->id.taskId, sizeof(pTask->id.taskId));
+//
+// // NOTE: do not change the following order
+// atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
+// taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES);
+// }
+//
+// return TSDB_CODE_SUCCESS;
+//}
+
+int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle) {
+ void* pIter = NULL;
+ int32_t vgId = pStreamMeta->vgId;
+
+ *pScanIdle = true;
+
+ bool allWalChecked = true;
+ tqDebug("vgId:%d start to check wal to extract new submit block", vgId);
+
+ while (1) {
+ pIter = taosHashIterate(pStreamMeta->pTasks, pIter);
+ if (pIter == NULL) {
+ break;
+ }
+
+ SStreamTask* pTask = *(SStreamTask**)pIter;
+ if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
+ continue;
+ }
+
+ if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE ||
+ pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
+ tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr,
+ pTask->status.taskStatus);
+ continue;
+ }
+
+ // check if offset value exists
+ char key[128] = {0};
+ createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId);
+
+ if (tInputQueueIsFull(pTask)) {
+ tqDebug("vgId:%d s-task:%s input queue is full, do nothing", vgId, pTask->id.idStr);
+ continue;
+ }
+
+ *pScanIdle = false;
+
+ // check if offset value exists
+ STqOffset* pOffset = tqOffsetRead(pOffsetStore, key);
+ ASSERT(pOffset != NULL);
+
+ // seek the stored version and extract data from WAL
+ int32_t code = walReadSeekVer(pTask->exec.pWalReader, pOffset->val.version);
+ if (code != TSDB_CODE_SUCCESS) { // no data in wal, quit
+ continue;
+ }
+
+ // append the data for the stream
+ tqDebug("vgId:%d wal reader seek to ver:%" PRId64 " %s", vgId, pOffset->val.version, pTask->id.idStr);
+
+ SPackedData packData = {0};
+ code = extractSubmitMsgFromWal(pTask->exec.pWalReader, &packData);
+ if (code != TSDB_CODE_SUCCESS) { // failed, continue
+ continue;
+ }
+
+ SStreamDataSubmit2* p = streamDataSubmitNew(packData, STREAM_INPUT__DATA_SUBMIT);
+ if (p == NULL) {
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ tqError("%s failed to create data submit for stream since out of memory", pTask->id.idStr);
+ continue;
+ }
+
+ allWalChecked = false;
+
+ tqDebug("s-task:%s submit data extracted from WAL", pTask->id.idStr);
+ code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)p, packData.ver);
+ if (code == TSDB_CODE_SUCCESS) {
+ pOffset->val.version = walReaderGetCurrentVer(pTask->exec.pWalReader);
+ tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr,
+ pOffset->val.version);
+ } else {
+ // do nothing
+ }
+
+ streamDataSubmitDestroy(p);
+ taosFreeQitem(p);
+ }
+
+ if (allWalChecked) {
+ *pScanIdle = true;
+ }
+ return 0;
+}
+
diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c
index f01e169a53..27db66f048 100644
--- a/source/dnode/vnode/src/tq/tqScan.c
+++ b/source/dnode/vnode/src/tq/tqScan.c
@@ -38,7 +38,7 @@ int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t
}
static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp) {
- SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pExecReader->pSchemaWrapper);
+ SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pTqReader->pSchemaWrapper);
if (pSW == NULL) {
return -1;
}
@@ -137,7 +137,7 @@ int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMeta
if (pDataBlock != NULL && pDataBlock->info.rows > 0) {
if (pRsp->withTbName) {
if (pOffset->type == TMQ_OFFSET__LOG) {
- int64_t uid = pExec->pExecReader->lastBlkUid;
+ int64_t uid = pExec->pTqReader->lastBlkUid;
if (tqAddTbNameToRsp(pTq, uid, pRsp, 1) < 0) {
continue;
}
@@ -203,9 +203,9 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
SArray* pSchemas = taosArrayInit(0, sizeof(void*));
if (pExec->subType == TOPIC_SUB_TYPE__TABLE) {
- STqReader* pReader = pExec->pExecReader;
- tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver);
- while (tqNextDataBlock2(pReader)) {
+ STqReader* pReader = pExec->pTqReader;
+ tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver);
+ while (tqNextDataBlock(pReader)) {
taosArrayClear(pBlocks);
taosArrayClear(pSchemas);
SSubmitTbData* pSubmitTbDataRet = NULL;
@@ -213,7 +213,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue;
}
if (pRsp->withTbName) {
- int64_t uid = pExec->pExecReader->lastBlkUid;
+ int64_t uid = pExec->pTqReader->lastBlkUid;
if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) {
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper);
@@ -262,8 +262,8 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
}
}
} else if (pExec->subType == TOPIC_SUB_TYPE__DB) {
- STqReader* pReader = pExec->pExecReader;
- tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver);
+ STqReader* pReader = pExec->pTqReader;
+ tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver);
while (tqNextDataBlockFilterOut2(pReader, pExec->execDb.pFilterOutTbUid)) {
taosArrayClear(pBlocks);
taosArrayClear(pSchemas);
@@ -272,7 +272,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue;
}
if (pRsp->withTbName) {
- int64_t uid = pExec->pExecReader->lastBlkUid;
+ int64_t uid = pExec->pTqReader->lastBlkUid;
if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) {
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper);
diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c
index 4645df5b67..62b81305b7 100644
--- a/source/dnode/vnode/src/tq/tqSink.c
+++ b/source/dnode/vnode/src/tq/tqSink.c
@@ -87,7 +87,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
return;
}
- tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz);
+ tqDebug("vgId:%d, s-task:%s write into table, block num: %d", TD_VID(pVnode), pTask->id.idStr, blockSz);
for (int32_t i = 0; i < blockSz; i++) {
bool createTb = true;
SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i);
@@ -382,7 +382,7 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void*
int32_t blockSz = taosArrayGetSize(pBlocks);
- tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz);
+ tqDebug("vgId:%d, s-task:%s write results blocks:%d into table", TD_VID(pVnode), pTask->id.idStr, blockSz);
void* pBuf = NULL;
SArray* tagArray = NULL;
@@ -475,11 +475,9 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void*
}
for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) {
SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId);
- STagVal tagVal = {
- .cid = pTSchema->numOfCols + step,
- .type = pTagData->info.type,
- };
- void* pData = colDataGetData(pTagData, rowId);
+
+ STagVal tagVal = {.cid = pTSchema->numOfCols + step, .type = pTagData->info.type};
+ void* pData = colDataGetData(pTagData, rowId);
if (colDataIsNull_s(pTagData, rowId)) {
continue;
} else if (IS_VAR_DATA_TYPE(pTagData->info.type)) {
diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c
new file mode 100644
index 0000000000..4c37e1052f
--- /dev/null
+++ b/source/dnode/vnode/src/tq/tqUtil.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+#include "tq.h"
+
+#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0)
+
+static int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp);
+
+// stream_task:stream_id:task_id
+void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) {
+ int32_t n = 12;
+ char* p = dst;
+
+ memcpy(p, "stream_task:", n);
+ p += n;
+
+ int32_t inc = tintToHex(streamId, p);
+ p += inc;
+
+ *(p++) = ':';
+ tintToHex(taskId, p);
+}
+
+int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) {
+ int32_t code = tAppendDataToInputQueue(pTask, pQueueItem);
+ if (code < 0) {
+ tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver);
+ return -1;
+ }
+
+ if (streamSchedExec(pTask) < 0) {
+ tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno));
+ return -1;
+ }
+
+ return TSDB_CODE_SUCCESS;
+}
+
+void initOffsetForAllRestoreTasks(STQ* pTq) {
+ void* pIter = NULL;
+
+ while(1) {
+ pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
+ if (pIter == NULL) {
+ break;
+ }
+
+ SStreamTask* pTask = *(SStreamTask**)pIter;
+ if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
+ continue;
+ }
+
+ if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
+ tqDebug("s-task:%s skip push data, since not ready, status %d", pTask->id.idStr, pTask->status.taskStatus);
+ continue;
+ }
+
+ char key[128] = {0};
+ createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId);
+
+ STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);
+ if (pOffset == NULL) {
+ doSaveTaskOffset(pTq->pOffsetStore, key, pTask->chkInfo.version);
+ }
+ }
+}
+
+void saveOffsetForAllTasks(STQ* pTq, int64_t ver) {
+ void* pIter = NULL;
+
+ while(1) {
+ pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
+ if (pIter == NULL) {
+ break;
+ }
+
+ SStreamTask* pTask = *(SStreamTask**)pIter;
+ if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
+ continue;
+ }
+
+ if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
+ tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr,
+ pTask->status.taskStatus);
+ continue;
+ }
+
+ char key[128] = {0};
+ createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId);
+
+ STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);
+ if (pOffset == NULL) {
+ doSaveTaskOffset(pTq->pOffsetStore, key, ver);
+ }
+ }
+}
+
+void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) {
+ STqOffset offset = {0};
+ tqOffsetResetToLog(&offset.val, ver);
+
+ tstrncpy(offset.subKey, pKey, tListLen(offset.subKey));
+
+ // keep the offset info in the offset store
+ tqOffsetWrite(pOffsetStore, &offset);
+}
+
+static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) {
+ pRsp->reqOffset = pReq->reqOffset;
+
+ pRsp->blockData = taosArrayInit(0, sizeof(void*));
+ pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));
+
+ if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) {
+ return -1;
+ }
+
+ pRsp->withTbName = 0;
+ pRsp->withSchema = false;
+ return 0;
+}
+
+static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) {
+ pRsp->reqOffset = pReq->reqOffset;
+
+ pRsp->withTbName = 1;
+ pRsp->withSchema = 1;
+ pRsp->blockData = taosArrayInit(0, sizeof(void*));
+ pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));
+ pRsp->blockTbName = taosArrayInit(0, sizeof(void*));
+ pRsp->blockSchema = taosArrayInit(0, sizeof(void*));
+
+ if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
+ SRpcMsg* pMsg, bool* pBlockReturned) {
+ uint64_t consumerId = pRequest->consumerId;
+ STqOffsetVal reqOffset = pRequest->reqOffset;
+ STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey);
+ int32_t vgId = TD_VID(pTq->pVnode);
+
+ *pBlockReturned = false;
+
+ // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value.
+ if (pOffset != NULL) {
+ *pOffsetVal = pOffset->val;
+
+ char formatBuf[80];
+ tFormatOffset(formatBuf, 80, pOffsetVal);
+ tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%"PRIx64,
+ consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId);
+ return 0;
+ } else {
+ // no poll occurs in this vnode for this topic, let's seek to the right offset value.
+ if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) {
+ if (pRequest->useSnapshot) {
+ tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot",
+ consumerId, pHandle->subKey, vgId);
+
+ if (pHandle->fetchMeta) {
+ tqOffsetResetToMeta(pOffsetVal, 0);
+ } else {
+ tqOffsetResetToData(pOffsetVal, 0, 0);
+ }
+ } else {
+ pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
+ if (pHandle->pRef == NULL) {
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ return -1;
+ }
+
+ // offset set to previous version when init
+ tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1);
+ }
+ } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
+ if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
+ SMqDataRsp dataRsp = {0};
+ tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);
+
+ tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
+ tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId,
+ pHandle->subKey, vgId, dataRsp.rspOffset.version);
+ int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
+ tDeleteSMqDataRsp(&dataRsp);
+
+ *pBlockReturned = true;
+ return code;
+ } else {
+ STaosxRsp taosxRsp = {0};
+ tqInitTaosxRsp(&taosxRsp, pRequest);
+ tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
+ int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
+ tDeleteSTaosxRsp(&taosxRsp);
+
+ *pBlockReturned = true;
+ return code;
+ }
+ } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) {
+ tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 " in vg %d, subkey %s, reset none failed",
+ pHandle->subKey, consumerId, vgId, pRequest->subKey);
+ terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET;
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
+ SRpcMsg* pMsg, STqOffsetVal* pOffset) {
+ uint64_t consumerId = pRequest->consumerId;
+ int32_t vgId = TD_VID(pTq->pVnode);
+
+ SMqDataRsp dataRsp = {0};
+ tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);
+
+ // lock
+ taosWLockLatch(&pTq->lock);
+
+ qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId);
+ int code = tqScanData(pTq, pHandle, &dataRsp, pOffset);
+ if(code != 0) {
+ goto end;
+ }
+
+ // till now, all data has been transferred to consumer, new data needs to push client once arrived.
+ if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG &&
+ dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) {
+ code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
+ taosWUnLockLatch(&pTq->lock);
+ return code;
+ }
+
+
+ code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP);
+
+ // NOTE: this pHandle->consumerId may have been changed already.
+
+ end:
+ {
+ char buf[80] = {0};
+ tFormatOffset(buf, 80, &dataRsp.rspOffset);
+ tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d",
+ consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code);
+ taosWUnLockLatch(&pTq->lock);
+ tDeleteSMqDataRsp(&dataRsp);
+ }
+ return code;
+}
+
+
+static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal *offset) {
+ int code = 0;
+ int32_t vgId = TD_VID(pTq->pVnode);
+ SWalCkHead* pCkHead = NULL;
+ SMqMetaRsp metaRsp = {0};
+ STaosxRsp taosxRsp = {0};
+ tqInitTaosxRsp(&taosxRsp, pRequest);
+
+ if (offset->type != TMQ_OFFSET__LOG) {
+ if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) {
+ return -1;
+ }
+
+ if (metaRsp.metaRspLen > 0) {
+ code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp);
+ tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",ts:%" PRId64,
+ pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts);
+ taosMemoryFree(metaRsp.metaRsp);
+ tDeleteSTaosxRsp(&taosxRsp);
+ return code;
+ }
+
+ tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64
+ ",ts:%" PRId64,pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid,taosxRsp.rspOffset.ts);
+ if (taosxRsp.blockNum > 0) {
+ code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
+ tDeleteSTaosxRsp(&taosxRsp);
+ return code;
+ }else {
+ *offset = taosxRsp.rspOffset;
+ }
+ }
+
+
+ if (offset->type == TMQ_OFFSET__LOG) {
+ int64_t fetchVer = offset->version + 1;
+ pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048);
+ if (pCkHead == NULL) {
+ tDeleteSTaosxRsp(&taosxRsp);
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ return -1;
+ }
+ walSetReaderCapacity(pHandle->pWalReader, 2048);
+ int totalRows = 0;
+ while (1) {
+ int32_t savedEpoch = atomic_load_32(&pHandle->epoch);
+ if (savedEpoch > pRequest->epoch) {
+ tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64
+ ", found new consumer epoch %d, discard req epoch %d", pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch);
+ break;
+ }
+
+ if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) {
+ tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
+ code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
+ tDeleteSTaosxRsp(&taosxRsp);
+ taosMemoryFreeClear(pCkHead);
+ return code;
+ }
+
+ SWalCont* pHead = &pCkHead->head;
+ tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", pRequest->consumerId,
+ pRequest->epoch, vgId, fetchVer, pHead->msgType);
+
+ // process meta
+ if (pHead->msgType != TDMT_VND_SUBMIT) {
+ if(totalRows > 0) {
+ tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1);
+ code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
+ tDeleteSTaosxRsp(&taosxRsp);
+ taosMemoryFreeClear(pCkHead);
+ return code;
+ }
+
+ tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType));
+ tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer);
+ metaRsp.resMsgType = pHead->msgType;
+ metaRsp.metaRspLen = pHead->bodyLen;
+ metaRsp.metaRsp = pHead->body;
+ if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) {
+ code = -1;
+ taosMemoryFreeClear(pCkHead);
+ tDeleteSTaosxRsp(&taosxRsp);
+ return code;
+ }
+ code = 0;
+ taosMemoryFreeClear(pCkHead);
+ tDeleteSTaosxRsp(&taosxRsp);
+ return code;
+ }
+
+ // process data
+ SPackedData submit = {
+ .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)),
+ .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg),
+ .ver = pHead->version,
+ };
+
+ if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) {
+ tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId,
+ pRequest->subKey);
+ taosMemoryFreeClear(pCkHead);
+ tDeleteSTaosxRsp(&taosxRsp);
+ return -1;
+ }
+
+ if (totalRows >= 4096 || taosxRsp.createTableNum > 0) {
+ tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
+ code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
+ tDeleteSTaosxRsp(&taosxRsp);
+ taosMemoryFreeClear(pCkHead);
+ return code;
+ } else {
+ fetchVer++;
+ }
+ }
+ }
+
+ tDeleteSTaosxRsp(&taosxRsp);
+ taosMemoryFreeClear(pCkHead);
+ return 0;
+}
+
+int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) {
+ int32_t code = -1;
+ STqOffsetVal offset = {0};
+ STqOffsetVal reqOffset = pRequest->reqOffset;
+
+ // 1. reset the offset if needed
+ if (IS_OFFSET_RESET_TYPE(reqOffset.type)) {
+ // handle the reset offset cases, according to the consumer's choice.
+ bool blockReturned = false;
+ code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned);
+ if (code != 0) {
+ return code;
+ }
+
+ // empty block returned, quit
+ if (blockReturned) {
+ return 0;
+ }
+ } else { // use the consumer specified offset
+ // the offset value can not be monotonious increase??
+ offset = reqOffset;
+ }
+
+ // this is a normal subscribe requirement
+ if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
+ return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
+ }
+
+ // todo handle the case where re-balance occurs.
+ // for taosx
+ return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
+}
+
+int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) {
+ int32_t len = 0;
+ int32_t code = 0;
+ tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code);
+ if (code < 0) {
+ return -1;
+ }
+ int32_t tlen = sizeof(SMqRspHead) + len;
+ void* buf = rpcMallocCont(tlen);
+ if (buf == NULL) {
+ return -1;
+ }
+
+ ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP;
+ ((SMqRspHead*)buf)->epoch = pReq->epoch;
+ ((SMqRspHead*)buf)->consumerId = pReq->consumerId;
+
+ void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
+
+ SEncoder encoder = {0};
+ tEncoderInit(&encoder, abuf, len);
+ tEncodeSMqMetaRsp(&encoder, pRsp);
+ tEncoderClear(&encoder);
+
+ SRpcMsg resp = {
+ .info = pMsg->info,
+ .pCont = buf,
+ .contLen = tlen,
+ .code = 0,
+ };
+ tmsgSendRsp(&resp);
+
+ tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d",
+ TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type);
+
+ return 0;
+}
diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c
index a58d050009..89686c3d33 100644
--- a/source/dnode/vnode/src/tsdb/tsdbRead.c
+++ b/source/dnode/vnode/src/tsdb/tsdbRead.c
@@ -2089,7 +2089,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader,
pBlockScanInfo->lastKey = tsLastBlock;
return TSDB_CODE_SUCCESS;
} else {
- int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
+ code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@@ -2113,7 +2113,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader,
}
}
} else { // not merge block data
- int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
+ code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@@ -2576,7 +2576,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc
SRow* pTSRow = NULL;
SRowMerger merge = {0};
- int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
+ code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@@ -3243,8 +3243,8 @@ static int32_t readRowsCountFromFiles(STsdbReader* pReader) {
int32_t code = TSDB_CODE_SUCCESS;
while (1) {
- bool hasNext = false;
- int32_t code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext);
+ bool hasNext = false;
+ code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext);
if (code) {
return code;
}
@@ -3516,8 +3516,8 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_
int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
int64_t endVer = 0;
- if (pCond->endVersion ==
- -1) { // user not specified end version, set current maximum version of vnode as the endVersion
+ if (pCond->endVersion == -1) {
+ // user not specified end version, set current maximum version of vnode as the endVersion
endVer = pVnode->state.applied;
} else {
endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c
index 65fac5a475..b62bf27def 100644
--- a/source/dnode/vnode/src/vnd/vnodeSvr.c
+++ b/source/dnode/vnode/src/vnd/vnodeSvr.c
@@ -400,7 +400,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
}
break;
case TDMT_STREAM_TASK_DEPLOY: {
- if (tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) {
+ if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) {
goto _err;
}
} break;
@@ -447,13 +447,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
walApplyVer(pVnode->pWal, version);
- /*vInfo("vgId:%d, push msg begin", pVnode->config.vgId);*/
if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) {
/*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/
vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno));
return -1;
}
- /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/
// commit if need
if (needCommit) {
@@ -541,13 +539,10 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) {
return vnodeGetBatchMeta(pVnode, pMsg);
case TDMT_VND_TMQ_CONSUME:
return tqProcessPollReq(pVnode->pTq, pMsg);
-
case TDMT_STREAM_TASK_RUN:
return tqProcessTaskRunReq(pVnode->pTq, pMsg);
-#if 1
case TDMT_STREAM_TASK_DISPATCH:
return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true);
-#endif
case TDMT_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_DISPATCH_RSP:
diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c
index 2b4eff08d3..d4a394b584 100644
--- a/source/dnode/vnode/src/vnd/vnodeSync.c
+++ b/source/dnode/vnode/src/vnd/vnodeSync.c
@@ -553,6 +553,9 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
pVnode->restored = true;
vInfo("vgId:%d, sync restore finished", pVnode->config.vgId);
+
+ // start to restore all stream tasks
+ tqStartStreamTasks(pVnode->pTq);
}
static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h
index 7d90560c33..2cb6626b03 100644
--- a/source/libs/executor/inc/executorimpl.h
+++ b/source/libs/executor/inc/executorimpl.h
@@ -127,14 +127,9 @@ enum {
};
typedef struct {
- // TODO remove prepareStatus
- // STqOffsetVal prepareStatus; // for tmq
- STqOffsetVal currentOffset; // for tmq
- SMqMetaRsp metaRsp; // for tmq fetching meta
- // int8_t returned;
- int64_t snapshotVer;
- // const SSubmitReq* pReq;
-
+ STqOffsetVal currentOffset; // for tmq
+ SMqMetaRsp metaRsp; // for tmq fetching meta
+ int64_t snapshotVer;
SPackedData submit;
SSchemaWrapper* schema;
char tbName[TSDB_TABLE_NAME_LEN];
@@ -144,6 +139,8 @@ typedef struct {
int64_t fillHistoryVer1;
int64_t fillHistoryVer2;
SStreamState* pState;
+ int64_t dataVersion;
+ int64_t checkPointId;
} SStreamTaskInfo;
typedef struct {
@@ -191,7 +188,6 @@ enum {
OP_OPENED = 0x1,
OP_RES_TO_RETURN = 0x5,
OP_EXEC_DONE = 0x9,
- // OP_EXEC_RECV = 0x11,
};
typedef struct SOperatorFpSet {
@@ -560,6 +556,7 @@ typedef struct SStreamIntervalOperatorInfo {
uint64_t numOfDatapack;
SArray* pUpdated;
SSHashObj* pUpdatedMap;
+ int64_t dataVersion;
} SStreamIntervalOperatorInfo;
typedef struct SDataGroupInfo {
@@ -609,6 +606,7 @@ typedef struct SStreamSessionAggOperatorInfo {
bool ignoreExpiredDataSaved;
SArray* pUpdated;
SSHashObj* pStUpdated;
+ int64_t dataVersion;
} SStreamSessionAggOperatorInfo;
typedef struct SStreamStateAggOperatorInfo {
@@ -627,6 +625,7 @@ typedef struct SStreamStateAggOperatorInfo {
bool ignoreExpiredDataSaved;
SArray* pUpdated;
SSHashObj* pSeUpdated;
+ int64_t dataVersion;
} SStreamStateAggOperatorInfo;
typedef struct SStreamPartitionOperatorInfo {
@@ -827,7 +826,7 @@ void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode);
void doDestroyTask(SExecTaskInfo* pTaskInfo);
void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status);
-char* buildTaskId(uint64_t taskId, uint64_t queryId);
+void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst);
SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo);
diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c
index 0229631d40..6e3a7d8725 100644
--- a/source/libs/executor/src/executor.c
+++ b/source/libs/executor/src/executor.c
@@ -127,7 +127,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu
pOperator->status = OP_NOT_OPENED;
SStreamScanInfo* pInfo = pOperator->info;
- qDebug("task stream set total blocks:%d %s", (int32_t)numOfBlocks, id);
+ qDebug("s-task set source blocks:%d %s", (int32_t)numOfBlocks, id);
ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0);
if (type == STREAM_INPUT__MERGED_SUBMIT) {
@@ -173,9 +173,7 @@ void doSetTaskId(SOperatorInfo* pOperator) {
void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) {
SExecTaskInfo* pTaskInfo = tinfo;
pTaskInfo->id.queryId = queryId;
-
- taosMemoryFreeClear(pTaskInfo->id.str);
- pTaskInfo->id.str = buildTaskId(taskId, queryId);
+ buildTaskId(taskId, queryId, pTaskInfo->id.str);
// set the idstr for tsdbReader
doSetTaskId(pTaskInfo->pRoot);
@@ -198,6 +196,13 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) {
return code;
}
+void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) {
+ SExecTaskInfo* pTaskInfo = tinfo;
+ *dataVer = pTaskInfo->streamInfo.dataVersion;
+ *ckId = pTaskInfo->streamInfo.checkPointId;
+}
+
+
int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) {
if (tinfo == NULL) {
return TSDB_CODE_APP_ERROR;
@@ -363,27 +368,23 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S
return qa;
}
-int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) {
+int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
+ const char* id = GET_TASKID(pTaskInfo);
+ int32_t code = 0;
if (isAdd) {
- qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), pTaskInfo->id.str);
+ qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), id);
}
// traverse to the stream scanner node to add this table id
- SOperatorInfo* pInfo = pTaskInfo->pRoot;
- while (pInfo->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
- pInfo = pInfo->pDownstream[0];
- }
-
- int32_t code = 0;
+ SOperatorInfo* pInfo = extractOperatorInTree(pTaskInfo->pRoot, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, id);
SStreamScanInfo* pScanInfo = pInfo->info;
+
if (isAdd) { // add new table id
SArray* qa = filterUnqualifiedTables(pScanInfo, tableIdList, GET_TASKID(pTaskInfo));
int32_t numOfQualifiedTables = taosArrayGetSize(qa);
-
- qDebug(" %d qualified child tables added into stream scanner", numOfQualifiedTables);
-
+ qDebug("%d qualified child tables added into stream scanner, %s", numOfQualifiedTables, id);
code = tqReaderAddTbUidList(pScanInfo->tqReader, qa);
if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroy(qa);
@@ -424,19 +425,6 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo
}
}
-#if 0
- bool exists = false;
- for (int32_t k = 0; k < taosArrayGetSize(pListInfo->pTableList); ++k) {
- STableKeyInfo* pKeyInfo = taosArrayGet(pListInfo->pTableList, k);
- if (pKeyInfo->uid == keyInfo.uid) {
- qWarn("ignore duplicated query table uid:%" PRIu64 " added, %s", pKeyInfo->uid, pTaskInfo->id.str);
- exists = true;
- }
- }
-
- if (!exists) {
-#endif
-
tableListAddTableInfo(pTableListInfo, keyInfo.uid, keyInfo.groupId);
}
@@ -447,7 +435,7 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo
taosArrayDestroy(qa);
} else { // remove the table id in current list
- qDebug(" %d remove child tables from the stream scanner", (int32_t)taosArrayGetSize(tableIdList));
+ qDebug("%d remove child tables from the stream scanner, %s", (int32_t)taosArrayGetSize(tableIdList), id);
taosWLockLatch(&pTaskInfo->lock);
code = tqReaderRemoveTbUidList(pScanInfo->tqReader, tableIdList);
taosWUnLockLatch(&pTaskInfo->lock);
@@ -1273,3 +1261,22 @@ void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) {
rpcFreeCont(pMsg->pCont);
destroySendMsgInfo(pSendInfo);
}
+
+SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) {
+ SExecTaskInfo* pTaskInfo = tinfo;
+ SArray* plist = getTableListInfo(pTaskInfo);
+
+ // only extract table in the first elements
+ STableListInfo* pTableListInfo = taosArrayGetP(plist, 0);
+
+ SArray* pUidList = taosArrayInit(10, sizeof(uint64_t));
+
+ int32_t numOfTables = tableListGetSize(pTableListInfo);
+ for(int32_t i = 0; i < numOfTables; ++i) {
+ STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i);
+ taosArrayPush(pUidList, &pKeyInfo->uid);
+ }
+
+ taosArrayDestroy(plist);
+ return pUidList;
+}
diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c
index a0697a7102..7594079cfb 100644
--- a/source/libs/executor/src/executorimpl.c
+++ b/source/libs/executor/src/executorimpl.c
@@ -1151,8 +1151,8 @@ void cleanupExprSupp(SExprSupp* pSupp) {
void cleanupBasicInfo(SOptrBasicInfo* pInfo) { pInfo->pRes = blockDataDestroy(pInfo->pRes); }
-char* buildTaskId(uint64_t taskId, uint64_t queryId) {
- char* p = taosMemoryMalloc(64);
+void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst) {
+ char* p = dst;
int32_t offset = 6;
memcpy(p, "TID:0x", offset);
@@ -1163,7 +1163,6 @@ char* buildTaskId(uint64_t taskId, uint64_t queryId) {
offset += tintToHex(queryId, &p[offset]);
p[offset] = 0;
- return p;
}
SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model,
@@ -1185,7 +1184,9 @@ SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t v
taosInitRWLatch(&pTaskInfo->lock);
pTaskInfo->id.vgId = vgId;
pTaskInfo->id.queryId = queryId;
- pTaskInfo->id.str = buildTaskId(taskId, queryId);
+
+ pTaskInfo->id.str = taosMemoryMalloc(64);
+ buildTaskId(taskId, queryId, pTaskInfo->id.str);
return pTaskInfo;
}
@@ -2008,7 +2009,11 @@ void qStreamCloseTsdbReader(void* task) {
}
static void extractTableList(SArray* pList, const SOperatorInfo* pOperator) {
- if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) {
+ if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
+ SStreamScanInfo* pScanInfo = pOperator->info;
+ STableScanInfo* pTableScanInfo = pScanInfo->pTableScanOp->info;
+ taosArrayPush(pList, &pTableScanInfo->base.pTableListInfo);
+ } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) {
STableScanInfo* pScanInfo = pOperator->info;
taosArrayPush(pList, &pScanInfo->base.pTableListInfo);
} else {
diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c
index cd539cc987..ae396a4c68 100644
--- a/source/libs/executor/src/scanoperator.c
+++ b/source/libs/executor/src/scanoperator.c
@@ -1637,7 +1637,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) {
if (pTaskInfo->streamInfo.submit.msgStr != NULL) {
if (pInfo->tqReader->msg2.msgStr == NULL) {
SPackedData submit = pTaskInfo->streamInfo.submit;
- if (tqReaderSetSubmitReq2(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) {
+ if (tqReaderSetSubmitMsg(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) {
qError("submit msg messed up when initing stream submit block %p", submit.msgStr);
return NULL;
}
@@ -1646,7 +1646,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) {
blockDataCleanup(pInfo->pRes);
SDataBlockInfo* pBlockInfo = &pInfo->pRes->info;
- while (tqNextDataBlock2(pInfo->tqReader)) {
+ while (tqNextDataBlock(pInfo->tqReader)) {
SSDataBlock block = {0};
int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL);
@@ -1812,7 +1812,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
/*resetTableScanInfo(pTSInfo, pWin);*/
tsdbReaderClose(pTSInfo->base.dataReader);
- qDebug("4");
pTSInfo->base.dataReader = NULL;
pInfo->pTableScanOp->status = OP_OPENED;
@@ -1895,7 +1894,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE;
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
tsdbReaderClose(pTSInfo->base.dataReader);
- qDebug("5");
pTSInfo->base.dataReader = NULL;
@@ -1922,6 +1920,7 @@ FETCH_NEXT_BLOCK:
if (pBlock->info.parTbName[0]) {
streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName);
}
+
// TODO move into scan
pBlock->info.calWin.skey = INT64_MIN;
pBlock->info.calWin.ekey = INT64_MAX;
@@ -2064,7 +2063,7 @@ FETCH_NEXT_BLOCK:
int32_t current = pInfo->validBlockIndex++;
SPackedData* pSubmit = taosArrayGet(pInfo->pBlockLists, current);
- if (tqReaderSetSubmitReq2(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) {
+ if (tqReaderSetSubmitMsg(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) {
qError("submit msg messed up when initing stream submit block %p, current %d, total %d", pSubmit, current,
totBlockNum);
continue;
@@ -2073,7 +2072,7 @@ FETCH_NEXT_BLOCK:
blockDataCleanup(pInfo->pRes);
- while (tqNextDataBlock2(pInfo->tqReader)) {
+ while (tqNextDataBlock(pInfo->tqReader)) {
SSDataBlock block = {0};
int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL);
diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c
index b42b9c467e..007a6f63d1 100644
--- a/source/libs/executor/src/timewindowoperator.c
+++ b/source/libs/executor/src/timewindowoperator.c
@@ -2333,9 +2333,15 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN
return startPos;
}
+static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) {
+ pTaskInfo->streamInfo.dataVersion = version;
+ pTaskInfo->streamInfo.checkPointId = ckId;
+}
+
static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId,
SSHashObj* pUpdatedMap) {
SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info;
+ pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version);
SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo);
SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo;
@@ -2432,10 +2438,8 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p
updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true);
applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows,
pSDataBlock->info.rows, numOfOutput);
- SWinKey key = {
- .ts = nextWin.skey,
- .groupId = groupId,
- };
+
+ SWinKey key = { .ts = nextWin.skey, .groupId = groupId };
saveOutputBuf(pInfo->pState, &key, pResult, pInfo->aggSup.resultRowSize);
releaseOutputBuf(pInfo->pState, &key, pResult);
if (pInfo->delKey.ts > key.ts) {
@@ -2503,6 +2507,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) {
clearFunctionContext(&pOperator->exprSupp);
// semi interval operator clear disk buffer
clearStreamIntervalOperator(pInfo);
+ setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId);
qDebug("===stream===clear semi operator");
} else {
deleteIntervalDiscBuf(pInfo->pState, pInfo->pPullDataMap, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark,
@@ -2776,6 +2781,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream,
pInfo->numOfDatapack = 0;
pInfo->pUpdated = NULL;
pInfo->pUpdatedMap = NULL;
+ pInfo->dataVersion = 0;
pOperator->operatorType = pPhyNode->type;
pOperator->blocking = true;
@@ -3126,6 +3132,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData
int32_t rows = pSDataBlock->info.rows;
int32_t winRows = 0;
+ pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version);
+
SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex);
TSKEY* startTsCols = (int64_t*)pStartTsCol->pData;
SColumnInfoData* pEndTsCol = NULL;
@@ -3589,6 +3597,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
pInfo->ignoreExpiredDataSaved = false;
pInfo->pUpdated = NULL;
pInfo->pStUpdated = NULL;
+ pInfo->dataVersion = 0;
setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true,
OP_NOT_OPENED, pInfo, pTaskInfo);
@@ -3899,6 +3908,9 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl
TSKEY* tsCols = NULL;
SResultRow* pResult = NULL;
int32_t winRows = 0;
+
+ pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version);
+
if (pSDataBlock->pDataBlock != NULL) {
SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex);
tsCols = (int64_t*)pColDataInfo->pData;
@@ -4115,6 +4127,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys
pInfo->ignoreExpiredDataSaved = false;
pInfo->pUpdated = NULL;
pInfo->pSeUpdated = NULL;
+ pInfo->dataVersion = 0;
setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED,
pInfo, pTaskInfo);
@@ -4750,6 +4763,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) {
&pInfo->delKey);
setOperatorCompleted(pOperator);
streamStateCommit(pTaskInfo->streamInfo.pState);
+ setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId);
return NULL;
}
@@ -4771,6 +4785,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) {
pInfo->numOfDatapack = 0;
break;
}
+
pInfo->numOfDatapack++;
printDataBlock(pBlock, "single interval recv");
diff --git a/source/libs/stream/CMakeLists.txt b/source/libs/stream/CMakeLists.txt
index ceddf4f215..790547bb61 100644
--- a/source/libs/stream/CMakeLists.txt
+++ b/source/libs/stream/CMakeLists.txt
@@ -9,7 +9,7 @@ target_include_directories(
target_link_libraries(
stream
PUBLIC tdb
- PRIVATE os util transport qcom executor
+ PRIVATE os util transport qcom executor wal
)
if(${BUILD_TEST})
diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h
index 66496f11f8..876b80697a 100644
--- a/source/libs/stream/inc/streamInc.h
+++ b/source/libs/stream/inc/streamInc.h
@@ -44,7 +44,7 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq*
int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet);
-SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem);
+SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem);
#ifdef __cplusplus
}
diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c
index 361cd2cacc..7171b52912 100644
--- a/source/libs/stream/src/stream.c
+++ b/source/libs/stream/src/stream.c
@@ -16,6 +16,8 @@
#include "streamInc.h"
#include "ttimer.h"
+#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 2000
+
int32_t streamInit() {
int8_t old;
while (1) {
@@ -50,7 +52,7 @@ void streamCleanUp() {
void streamSchedByTimer(void* param, void* tmrId) {
SStreamTask* pTask = (void*)param;
- if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
+ if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
streamMetaReleaseTask(NULL, pTask);
return;
}
@@ -64,15 +66,16 @@ void streamSchedByTimer(void* param, void* tmrId) {
taosFreeQitem(trigger);
return;
}
- trigger->pBlock->info.type = STREAM_GET_ALL;
+ trigger->pBlock->info.type = STREAM_GET_ALL;
atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE);
- if (tAppendDataForStream(pTask, (SStreamQueueItem*)trigger) < 0) {
+ if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) {
taosFreeQitem(trigger);
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer);
return;
}
+
streamSchedExec(pTask);
}
@@ -91,31 +94,33 @@ int32_t streamSetupTrigger(SStreamTask* pTask) {
int32_t streamSchedExec(SStreamTask* pTask) {
int8_t schedStatus =
- atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING);
+ atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING);
if (schedStatus == TASK_SCHED_STATUS__INACTIVE) {
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
- atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE);
+ terrno = TSDB_CODE_OUT_OF_MEMORY;
+ atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
return -1;
}
pRunReq->head.vgId = pTask->nodeId;
- pRunReq->streamId = pTask->streamId;
- pRunReq->taskId = pTask->taskId;
+ pRunReq->streamId = pTask->id.streamId;
+ pRunReq->taskId = pTask->id.taskId;
SRpcMsg msg = { .msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq) };
tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg);
+ qDebug("trigger to run s-task:%s", pTask->id.idStr);
}
return 0;
}
-int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) {
+int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) {
SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
int8_t status;
- // enqueue
+ // enqueue data block
if (pData != NULL) {
pData->type = STREAM_INPUT__DATA_BLOCK;
pData->srcVgId = pReq->dataSrcVgId;
@@ -123,10 +128,10 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR
/*pData->blocks = pReq->data;*/
/*pBlock->sourceVer = pReq->sourceVer;*/
streamDispatchReqToData(pReq, pData);
- if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) {
+ if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) {
status = TASK_INPUT_STATUS__NORMAL;
- } else {
- status = TASK_INPUT_STATUS__FAILED;
+ } else { // input queue is full, upstream is blocked now
+ status = TASK_INPUT_STATUS__BLOCKED;
}
} else {
streamTaskInputFail(pTask);
@@ -142,10 +147,12 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR
pCont->upstreamNodeId = htonl(pReq->upstreamNodeId);
pCont->upstreamTaskId = htonl(pReq->upstreamTaskId);
pCont->downstreamNodeId = htonl(pTask->nodeId);
- pCont->downstreamTaskId = htonl(pTask->taskId);
+ pCont->downstreamTaskId = htonl(pTask->id.taskId);
pRsp->pCont = buf;
+
pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp);
tmsgSendRsp(pRsp);
+
return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1;
}
@@ -155,7 +162,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
// enqueue
if (pData != NULL) {
- qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->taskId, pTask->selfChildId,
+ qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId,
pReq->srcTaskId, pReq->reqId);
pData->type = STREAM_INPUT__DATA_RETRIEVE;
@@ -164,7 +171,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
/*pData->blocks = pReq->data;*/
/*pBlock->sourceVer = pReq->sourceVer;*/
streamRetrieveReqToData(pReq, pData);
- if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) {
+ if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) {
status = TASK_INPUT_STATUS__NORMAL;
} else {
status = TASK_INPUT_STATUS__FAILED;
@@ -205,10 +212,10 @@ int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBlock* pBlock) {
}
int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) {
- qDebug("task %d receive dispatch req from node %d task %d", pTask->taskId, pReq->upstreamNodeId,
+ qDebug("vgId:%d s-task:%s receive dispatch req from taskId:%d", pReq->upstreamNodeId, pTask->id.idStr,
pReq->upstreamTaskId);
- streamTaskEnqueue(pTask, pReq, pRsp);
+ streamTaskEnqueueBlocks(pTask, pReq, pRsp);
tDeleteStreamDispatchReq(pReq);
if (exec) {
@@ -228,13 +235,14 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S
int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) {
ASSERT(pRsp->inputStatus == TASK_OUTPUT_STATUS__NORMAL || pRsp->inputStatus == TASK_OUTPUT_STATUS__BLOCKED);
-
- qDebug("task %d receive dispatch rsp, code: %x", pTask->taskId, code);
+ qDebug("s-task:%s receive dispatch rsp, code: %x", pTask->id.idStr, code);
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
- qDebug("task %d is shuffle, left waiting rsp %d", pTask->taskId, leftRsp);
- if (leftRsp > 0) return 0;
+ qDebug("task %d is shuffle, left waiting rsp %d", pTask->id.taskId, leftRsp);
+ if (leftRsp > 0) {
+ return 0;
+ }
}
int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus);
@@ -261,7 +269,7 @@ int32_t streamProcessRunReq(SStreamTask* pTask) {
}
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) {
- qDebug("task %d receive retrieve req from node %d task %d", pTask->taskId, pReq->srcNodeId, pReq->srcTaskId);
+ qDebug("task %d receive retrieve req from node %d task %d", pTask->id.taskId, pReq->srcNodeId, pReq->srcTaskId);
streamTaskEnqueueRetrieve(pTask, pReq, pRsp);
@@ -275,26 +283,43 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, S
return 0;
}
-int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) {
+bool tInputQueueIsFull(const SStreamTask* pTask) {
+ return taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY;
+}
+
+int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
int8_t type = pItem->type;
if (type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit2* pSubmitBlock = streamSubmitBlockClone((SStreamDataSubmit2*)pItem);
if (pSubmitBlock == NULL) {
- qDebug("task %d %p submit enqueue failed since out of memory", pTask->taskId, pTask);
+ qDebug("task %d %p submit enqueue failed since out of memory", pTask->id.taskId, pTask);
terrno = TSDB_CODE_OUT_OF_MEMORY;
atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED);
return -1;
}
int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1;
- qDebug("stream task:%d %p submit enqueue %p %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->taskId,
- pTask, pItem, pSubmitBlock, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen,
+ qDebug("s-task:%s submit enqueue %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->id.idStr,
+ pItem, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen,
pSubmitBlock->submit.ver, total);
+ if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) {
+ qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY);
+ streamDataSubmitDestroy(pSubmitBlock);
+ return -1;
+ }
+
taosWriteQitem(pTask->inputQueue->queue, pSubmitBlock);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE ||
type == STREAM_INPUT__REF_DATA_BLOCK) {
+ int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1;
+ if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) {
+ qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY);
+ return -1;
+ }
+
+ qDebug("s-task:%s data block enqueue, total in queue:%d", pTask->id.idStr, total);
taosWriteQitem(pTask->inputQueue->queue, pItem);
} else if (type == STREAM_INPUT__CHECKPOINT) {
taosWriteQitem(pTask->inputQueue->queue, pItem);
@@ -307,7 +332,6 @@ int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) {
}
#if 0
- // TODO: back pressure
atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__NORMAL);
#endif
diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c
index 3fba1cb556..ae616260f3 100644
--- a/source/libs/stream/src/streamData.c
+++ b/source/libs/stream/src/streamData.c
@@ -67,9 +67,8 @@ int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock
return 0;
}
-SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) {
+SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type) {
SStreamDataSubmit2* pDataSubmit = (SStreamDataSubmit2*)taosAllocateQitem(sizeof(SStreamDataSubmit2), DEF_QITEM, 0);
-
if (pDataSubmit == NULL) {
return NULL;
}
@@ -82,7 +81,7 @@ SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) {
pDataSubmit->submit = submit;
*pDataSubmit->dataRef = 1; // initialize the reference count to be 1
- pDataSubmit->type = STREAM_INPUT__DATA_SUBMIT;
+ pDataSubmit->type = type;
return pDataSubmit;
}
@@ -139,28 +138,27 @@ SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit) {
return pSubmitClone;
}
-SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem) {
- ASSERT(elem);
- if (dst->type == STREAM_INPUT__DATA_BLOCK && elem->type == STREAM_INPUT__DATA_BLOCK) {
+SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) {
+ if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) {
SStreamDataBlock* pBlock = (SStreamDataBlock*)dst;
- SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)elem;
+ SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem;
taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks);
taosArrayDestroy(pBlockSrc->blocks);
- taosFreeQitem(elem);
+ taosFreeQitem(pElem);
return dst;
- } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) {
+ } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) {
SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)dst;
- SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)elem;
+ SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)pElem;
streamMergeSubmit(pMerged, pBlockSrc);
- taosFreeQitem(elem);
+ taosFreeQitem(pElem);
return dst;
- } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) {
+ } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) {
SStreamMergedSubmit2* pMerged = streamMergedSubmitNew();
ASSERT(pMerged);
streamMergeSubmit(pMerged, (SStreamDataSubmit2*)dst);
- streamMergeSubmit(pMerged, (SStreamDataSubmit2*)elem);
+ streamMergeSubmit(pMerged, (SStreamDataSubmit2*)pElem);
taosFreeQitem(dst);
- taosFreeQitem(elem);
+ taosFreeQitem(pElem);
return (SStreamQueueItem*)pMerged;
} else {
return NULL;
diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c
index 7e7c23f98a..a9f6d29bf5 100644
--- a/source/libs/stream/src/streamDispatch.c
+++ b/source/libs/stream/src/streamDispatch.c
@@ -121,9 +121,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
SStreamRetrieveReq req = {
- .streamId = pTask->streamId,
+ .streamId = pTask->id.streamId,
.srcNodeId = pTask->nodeId,
- .srcTaskId = pTask->taskId,
+ .srcTaskId = pTask->id.taskId,
.pRetrieve = pRetrieve,
.retrieveLen = dataStrLen,
};
@@ -168,7 +168,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
}
buf = NULL;
- qDebug("task %d(child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->taskId,
+ qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->id.idStr,
pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId);
}
code = 0;
@@ -238,7 +238,8 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq*
msg.pCont = buf;
msg.msgType = TDMT_STREAM_TASK_CHECK;
- qDebug("dispatch from task %d to task %d node %d: check msg", pTask->taskId, pReq->downstreamTaskId, nodeId);
+ qDebug("dispatch from s-task:%s to downstream s-task:%"PRIx64":%d node %d: check msg", pTask->id.idStr,
+ pReq->streamId, pReq->downstreamTaskId, nodeId);
tmsgSendReq(pEpSet, &msg);
@@ -282,7 +283,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov
tmsgSendReq(pEpSet, &msg);
- qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->taskId, pReq->taskId, vgId);
+ qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->id.taskId, pReq->taskId, vgId);
return 0;
FAIL:
@@ -319,8 +320,7 @@ int32_t streamDispatchOneDataReq(SStreamTask* pTask, const SStreamDispatchReq* p
msg.pCont = buf;
msg.msgType = pTask->dispatchMsgType;
- qDebug("dispatch from task %d to task %d node %d: data msg", pTask->taskId, pReq->taskId, vgId);
-
+ qDebug("dispatch from s-task:%s to taskId:%d vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId);
tmsgSendReq(pEpSet, &msg);
code = 0;
@@ -382,9 +382,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
SStreamDispatchReq req = {
- .streamId = pTask->streamId,
+ .streamId = pTask->id.streamId,
.dataSrcVgId = pData->srcVgId,
- .upstreamTaskId = pTask->taskId,
+ .upstreamTaskId = pTask->id.taskId,
.upstreamChildId = pTask->selfChildId,
.upstreamNodeId = pTask->nodeId,
.blockNum = blockNum,
@@ -402,14 +402,15 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
goto FAIL_FIXED_DISPATCH;
}
}
+
int32_t vgId = pTask->fixedEpDispatcher.nodeId;
SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet;
int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId;
req.taskId = downstreamTaskId;
- qDebug("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->taskId, pTask->selfChildId,
- downstreamTaskId, vgId);
+ qDebug("s-task:%s (child taskId:%d) dispatch blocks:%d to down stream s-task:%d in vgId:%d", pTask->id.idStr,
+ pTask->selfChildId, blockNum, downstreamTaskId, vgId);
if (streamDispatchOneDataReq(pTask, &req, vgId, pEpSet) < 0) {
goto FAIL_FIXED_DISPATCH;
@@ -432,9 +433,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
}
for (int32_t i = 0; i < vgSz; i++) {
- pReqs[i].streamId = pTask->streamId;
+ pReqs[i].streamId = pTask->id.streamId;
pReqs[i].dataSrcVgId = pData->srcVgId;
- pReqs[i].upstreamTaskId = pTask->taskId;
+ pReqs[i].upstreamTaskId = pTask->id.taskId;
pReqs[i].upstreamChildId = pTask->selfChildId;
pReqs[i].upstreamNodeId = pTask->nodeId;
pReqs[i].blockNum = 0;
@@ -494,6 +495,8 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
int32_t streamDispatch(SStreamTask* pTask) {
ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH);
+ qDebug("s-task:%s try to dispatch intermediate result block to downstream, numofBlocks in outputQ:%d", pTask->id.idStr,
+ taosQueueItemSize(pTask->outputQueue->queue));
int8_t old =
atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT);
@@ -503,13 +506,12 @@ int32_t streamDispatch(SStreamTask* pTask) {
SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue);
if (pBlock == NULL) {
- qDebug("stream stop dispatching since no output: task %d", pTask->taskId);
+ qDebug("s-task:%s stream stop dispatching since no output in output queue", pTask->id.idStr);
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
return 0;
}
- ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK);
- qDebug("stream dispatching: task %d", pTask->taskId);
+ ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK);
int32_t code = 0;
if (streamDispatchAllBlocks(pTask, pBlock) < 0) {
@@ -518,6 +520,7 @@ int32_t streamDispatch(SStreamTask* pTask) {
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
goto FREE;
}
+
FREE:
taosArrayDestroyEx(pBlock->blocks, (FDelete)blockDataFreeRes);
taosFreeQitem(pBlock);
diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c
index 6ef327049c..3d896c08ac 100644
--- a/source/libs/stream/src/streamExec.c
+++ b/source/libs/stream/src/streamExec.c
@@ -18,70 +18,82 @@
#define STREAM_EXEC_MAX_BATCH_NUM 100
static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* pRes) {
- int32_t code;
- void* exec = pTask->exec.executor;
- while(pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) {
- qError("stream task wait for the end of fill history");
- taosMsleep(2);
- continue;
+ int32_t code = TSDB_CODE_SUCCESS;
+ void* pExecutor = pTask->exec.pExecutor;
+
+ while (pTask->taskLevel == TASK_LEVEL__SOURCE) {
+ int8_t status = atomic_load_8(&pTask->status.taskStatus);
+ if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__RESTORE) {
+ qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr,
+ atomic_load_8(&pTask->status.taskStatus));
+ taosMsleep(2);
+ } else {
+ break;
+ }
}
// set input
const SStreamQueueItem* pItem = (const SStreamQueueItem*)data;
if (pItem->type == STREAM_INPUT__GET_RES) {
const SStreamTrigger* pTrigger = (const SStreamTrigger*)data;
- qSetMultiStreamInput(exec, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
+ qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE);
const SStreamDataSubmit2* pSubmit = (const SStreamDataSubmit2*)data;
- qDebug("stream task:%d %p set submit input %p %p %d %" PRId64, pTask->taskId, pTask, pSubmit, pSubmit->submit.msgStr,
+ qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
+ qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, pTask->id.idStr, pSubmit, pSubmit->submit.msgStr,
pSubmit->submit.msgLen, pSubmit->submit.ver);
- qSetMultiStreamInput(exec, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
} else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) {
const SStreamDataBlock* pBlock = (const SStreamDataBlock*)data;
- SArray* blocks = pBlock->blocks;
- qDebug("task %d %p set ssdata input", pTask->taskId, pTask);
- qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__DATA_BLOCK);
+
+ SArray* pBlockList = pBlock->blocks;
+ int32_t numOfBlocks = taosArrayGetSize(pBlockList);
+ qDebug("s-task:%s set sdata blocks as input num:%d, ver:%"PRId64, pTask->id.idStr, numOfBlocks, pBlock->sourceVer);
+ qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
const SStreamMergedSubmit2* pMerged = (const SStreamMergedSubmit2*)data;
- SArray* blocks = pMerged->submits;
- qDebug("task %d %p set submit input (merged), batch num: %d", pTask->taskId, pTask, (int32_t)blocks->size);
- qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__MERGED_SUBMIT);
+
+ SArray* pBlockList = pMerged->submits;
+ int32_t numOfBlocks = taosArrayGetSize(pBlockList);
+ qDebug("st-task:%s %p set submit input (merged), batch num:%d", pTask->id.idStr, pTask, numOfBlocks);
+ qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT);
} else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) {
const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)data;
- qSetMultiStreamInput(exec, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
+ qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else {
ASSERT(0);
}
- // exec
+ // pExecutor
while (1) {
- if (pTask->taskStatus == TASK_STATUS__DROPPING) {
+ if (pTask->status.taskStatus == TASK_STATUS__DROPPING) {
return 0;
}
SSDataBlock* output = NULL;
uint64_t ts = 0;
- if ((code = qExecTask(exec, &output, &ts)) < 0) {
+ if ((code = qExecTask(pExecutor, &output, &ts)) < 0) {
if (code == TSDB_CODE_QRY_IN_EXEC) {
- resetTaskInfo(exec);
+ resetTaskInfo(pExecutor);
}
- /*ASSERT(false);*/
- qError("unexpected stream execution, stream %" PRId64 " task: %d, since %s", pTask->streamId, pTask->taskId,
- terrstr());
+
+ qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr());
continue;
}
+
if (output == NULL) {
if (pItem->type == STREAM_INPUT__DATA_RETRIEVE) {
- SSDataBlock block = {0};
+ SSDataBlock block = {0};
+
const SStreamDataBlock* pRetrieveBlock = (const SStreamDataBlock*)data;
ASSERT(taosArrayGetSize(pRetrieveBlock->blocks) == 1);
+
assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0));
block.info.type = STREAM_PULL_OVER;
block.info.childId = pTask->selfChildId;
taosArrayPush(pRes, &block);
- qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->taskId, pTask->selfChildId,
+ qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId,
pRetrieveBlock->reqId);
}
break;
@@ -94,20 +106,21 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray*
continue;
}
- qDebug("task %d(child %d) executed and get block", pTask->taskId, pTask->selfChildId);
+ qDebug("task %d(child %d) executed and get block", pTask->id.taskId, pTask->selfChildId);
SSDataBlock block = {0};
assignOneDataBlock(&block, output);
block.info.childId = pTask->selfChildId;
taosArrayPush(pRes, &block);
}
+
return 0;
}
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE);
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
qSetStreamOpOpen(exec);
bool finished = false;
@@ -121,7 +134,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
int32_t batchCnt = 0;
while (1) {
- if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
+ if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
taosArrayDestroy(pRes);
return 0;
}
@@ -147,17 +160,17 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
batchCnt++;
- qDebug("task %d scan exec block num %d, block limit %d", pTask->taskId, batchCnt, batchSz);
+ qDebug("task %d scan exec block num %d, block limit %d", pTask->id.taskId, batchCnt, batchSz);
if (batchCnt >= batchSz) break;
}
if (taosArrayGetSize(pRes) == 0) {
if (finished) {
taosArrayDestroy(pRes);
- qDebug("task %d finish recover exec task ", pTask->taskId);
+ qDebug("task %d finish recover exec task ", pTask->id.taskId);
break;
} else {
- qDebug("task %d continue recover exec task ", pTask->taskId);
+ qDebug("task %d continue recover exec task ", pTask->id.taskId);
continue;
}
}
@@ -173,7 +186,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
streamTaskOutput(pTask, qRes);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
- qDebug("task %d scan exec dispatch block num %d", pTask->taskId, batchCnt);
+ qDebug("task %d scan exec dispatch block num %d", pTask->id.taskId, batchCnt);
streamDispatch(pTask);
}
if (finished) break;
@@ -186,7 +199,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) {
// fetch all queue item, merge according to batchLimit
int32_t numOfItems = taosReadAllQitems(pTask->inputQueue1, pTask->inputQall);
if (numOfItems == 0) {
- qDebug("task: %d, stream task exec over, queue empty", pTask->taskId);
+ qDebug("task: %d, stream task exec over, queue empty", pTask->id.taskId);
return 0;
}
SStreamQueueItem* pMerged = NULL;
@@ -221,106 +234,141 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) {
int32_t streamExecForAll(SStreamTask* pTask) {
while (1) {
- int32_t batchCnt = 1;
- void* input = NULL;
+ int32_t batchSize = 1;
+ void* pInput = NULL;
+
+ // merge multiple input data if possible in the input queue.
while (1) {
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
- qDebug("stream task exec over, queue empty, task: %d", pTask->taskId);
+// qDebug("s-task:%s extract data from input queue, queue is empty, abort", pTask->id.idStr);
break;
}
- if (input == NULL) {
- input = qItem;
+
+ if (pInput == NULL) {
+ pInput = qItem;
streamQueueProcessSuccess(pTask->inputQueue);
if (pTask->taskLevel == TASK_LEVEL__SINK) {
break;
}
} else {
- void* newRet;
- if ((newRet = streamMergeQueueItem(input, qItem)) == NULL) {
+ void* newRet = NULL;
+ if ((newRet = streamMergeQueueItem(pInput, qItem)) == NULL) {
streamQueueProcessFail(pTask->inputQueue);
break;
} else {
- batchCnt++;
- input = newRet;
+ batchSize++;
+ pInput = newRet;
streamQueueProcessSuccess(pTask->inputQueue);
- if (batchCnt > STREAM_EXEC_MAX_BATCH_NUM) {
+ if (batchSize > STREAM_EXEC_MAX_BATCH_NUM) {
break;
}
}
}
}
- if (pTask->taskStatus == TASK_STATUS__DROPPING) {
- if (input) streamFreeQitem(input);
+ if (pTask->status.taskStatus == TASK_STATUS__DROPPING) {
+ if (pInput) {
+ streamFreeQitem(pInput);
+ }
return 0;
}
- if (input == NULL) {
+ if (pInput == NULL) {
break;
}
if (pTask->taskLevel == TASK_LEVEL__SINK) {
- ASSERT(((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_BLOCK);
- streamTaskOutput(pTask, input);
+ ASSERT(((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_BLOCK);
+ qDebug("s-task:%s sink node start to sink result. numOfBlocks:%d", pTask->id.idStr, batchSize);
+ streamTaskOutput(pTask, pInput);
continue;
}
SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock));
+ qDebug("s-task:%s exec begin, numOfBlocks:%d", pTask->id.idStr, batchSize);
- qDebug("stream task:%d exec begin, msg batch: %d", pTask->taskId, batchCnt);
- streamTaskExecImpl(pTask, input, pRes);
+ streamTaskExecImpl(pTask, pInput, pRes);
- qDebug("stream task:%d exec end", pTask->taskId);
+ int64_t ckId = 0;
+ int64_t dataVer = 0;
+ qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId);
+ if (dataVer > pTask->chkInfo.version) { // save it since the checkpoint is updated
+ qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64
+ ", checkPoint id:%" PRId64 " -> %" PRId64,
+ pTask->id.idStr, pTask->chkInfo.version, dataVer, pTask->chkInfo.id, ckId);
+
+ pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId};
+
+ taosWLockLatch(&pTask->pMeta->lock);
+ streamMetaSaveTask(pTask->pMeta, pTask);
+ if (streamMetaCommit(pTask->pMeta) < 0) {
+ taosWUnLockLatch(&pTask->pMeta->lock);
+ qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr());
+ return -1;
+ } else {
+ taosWUnLockLatch(&pTask->pMeta->lock);
+ qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr);
+ }
+ } else {
+ qDebug("s-task:%s exec end", pTask->id.idStr);
+ }
if (taosArrayGetSize(pRes) != 0) {
SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
if (qRes == NULL) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
- streamFreeQitem(input);
+ streamFreeQitem(pInput);
return -1;
}
+
qRes->type = STREAM_INPUT__DATA_BLOCK;
qRes->blocks = pRes;
- if (((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_SUBMIT) {
- SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)input;
+ if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_SUBMIT) {
+ SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)pInput;
qRes->childId = pTask->selfChildId;
qRes->sourceVer = pSubmit->ver;
- } else if (((SStreamQueueItem*)input)->type == STREAM_INPUT__MERGED_SUBMIT) {
- SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)input;
+ } else if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__MERGED_SUBMIT) {
+ SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)pInput;
qRes->childId = pTask->selfChildId;
qRes->sourceVer = pMerged->ver;
}
if (streamTaskOutput(pTask, qRes) < 0) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
- streamFreeQitem(input);
+ streamFreeQitem(pInput);
taosFreeQitem(qRes);
return -1;
}
} else {
taosArrayDestroy(pRes);
}
- streamFreeQitem(input);
+ streamFreeQitem(pInput);
}
return 0;
}
int32_t streamTryExec(SStreamTask* pTask) {
+ // this function may be executed by multi-threads, so status check is required.
int8_t schedStatus =
- atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE);
+ atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE);
+
if (schedStatus == TASK_SCHED_STATUS__WAITING) {
int32_t code = streamExecForAll(pTask);
if (code < 0) {
- atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__FAILED);
+ atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED);
return -1;
}
- atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE);
+
+ // todo the task should be commit here
+ atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
+ qDebug("s-task:%s exec completed", pTask->id.idStr);
if (!taosQueueEmpty(pTask->inputQueue->queue)) {
streamSchedExec(pTask);
}
}
+
return 0;
}
diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c
index 03391c0ba2..51cc315780 100644
--- a/source/libs/stream/src/streamMeta.c
+++ b/source/libs/stream/src/streamMeta.c
@@ -24,6 +24,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
+
int32_t len = strlen(path) + 20;
char* streamPath = taosMemoryCalloc(1, len);
sprintf(streamPath, "%s/%s", path, "stream");
@@ -50,7 +51,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err;
}
- pMeta->pTasks = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
+ _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT);
+ pMeta->pTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK);
if (pMeta->pTasks == NULL) {
goto _err;
}
@@ -59,9 +61,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err;
}
+ pMeta->vgId = vgId;
pMeta->ahandle = ahandle;
pMeta->expandFunc = expandFunc;
-
+ taosInitRWLatch(&pMeta->lock);
return pMeta;
_err:
@@ -81,19 +84,28 @@ void streamMetaClose(SStreamMeta* pMeta) {
tdbClose(pMeta->db);
void* pIter = NULL;
+// while(pMeta->walScan) {
+// qDebug("wait stream daemon quit");
+// taosMsleep(100);
+// }
+
while (1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
- if (pIter == NULL) break;
+ if (pIter == NULL) {
+ break;
+ }
+
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->timer) {
taosTmrStop(pTask->timer);
pTask->timer = NULL;
}
- tFreeSStreamTask(pTask);
+
+ tFreeStreamTask(pTask);
/*streamMetaReleaseTask(pMeta, pTask);*/
}
+
taosHashCleanup(pMeta->pTasks);
- taosHashCleanup(pMeta->pRecoverStatus);
taosMemoryFree(pMeta->path);
taosMemoryFree(pMeta);
}
@@ -106,7 +118,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg,
}
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
- if (tDecodeSStreamTask(&decoder, pTask) < 0) {
+ if (tDecodeStreamTask(&decoder, pTask) < 0) {
tDecoderClear(&decoder);
goto FAIL;
}
@@ -117,12 +129,12 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg,
goto FAIL;
}
- if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) {
+ if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) {
goto FAIL;
}
- if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) {
- taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t));
+ if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) {
+ taosHashRemove(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t));
ASSERT(0);
goto FAIL;
}
@@ -130,7 +142,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg,
return 0;
FAIL:
- if (pTask) tFreeSStreamTask(pTask);
+ if (pTask) tFreeStreamTask(pTask);
return -1;
}
#endif
@@ -139,7 +151,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
void* buf = NULL;
int32_t len;
int32_t code;
- tEncodeSize(tEncodeSStreamTask, pTask, len, code);
+ tEncodeSize(tEncodeStreamTask, pTask, len, code);
if (code < 0) {
return -1;
}
@@ -150,10 +162,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
SEncoder encoder = {0};
tEncoderInit(&encoder, buf, len);
- tEncodeSStreamTask(&encoder, pTask);
+ tEncodeStreamTask(&encoder, pTask);
tEncoderClear(&encoder);
- if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) {
+ if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) {
return -1;
}
@@ -161,8 +173,8 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) {
return 0;
}
-#if 1
-int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) {
+// add to the ready tasks hash map, not the restored tasks hash map
+int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) {
if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) {
return -1;
}
@@ -171,39 +183,24 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) {
return -1;
}
- taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*));
-
+ taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES);
return 0;
}
-#endif
-#if 0
-SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId) {
- SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
- if (ppTask) {
- ASSERT((*ppTask)->taskId == taskId);
- return *ppTask;
- } else {
- return NULL;
- }
+int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) {
+ return (int32_t) taosHashGetSize(pMeta->pTasks);
}
-#endif
SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) {
taosRLockLatch(&pMeta->lock);
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
- if (ppTask) {
- SStreamTask* pTask = *ppTask;
- if (atomic_load_8(&pTask->taskStatus) != TASK_STATUS__DROPPING) {
- atomic_add_fetch_32(&pTask->refCnt, 1);
- taosRUnLockLatch(&pMeta->lock);
- return pTask;
- } else {
- taosRUnLockLatch(&pMeta->lock);
- return NULL;
- }
+ if (ppTask != NULL && (atomic_load_8(&((*ppTask)->status.taskStatus)) != TASK_STATUS__DROPPING)) {
+ atomic_add_fetch_32(&(*ppTask)->refCnt, 1);
+ taosRUnLockLatch(&pMeta->lock);
+ return *ppTask;
}
+
taosRUnLockLatch(&pMeta->lock);
return NULL;
}
@@ -212,8 +209,8 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
int32_t left = atomic_sub_fetch_32(&pTask->refCnt, 1);
ASSERT(left >= 0);
if (left == 0) {
- ASSERT(atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING);
- tFreeSStreamTask(pTask);
+ ASSERT(atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING);
+ tFreeStreamTask(pTask);
}
}
@@ -227,7 +224,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
* taosTmrStop(pTask->timer);*/
/*pTask->timer = NULL;*/
/*}*/
- atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING);
+ atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
taosWLockLatch(&pMeta->lock);
streamMetaReleaseTask(pMeta, pTask);
@@ -245,9 +242,12 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) {
int32_t streamMetaCommit(SStreamMeta* pMeta) {
if (tdbCommit(pMeta->db, pMeta->txn) < 0) {
+ ASSERT(0);
return -1;
}
+
if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) {
+ ASSERT(0);
return -1;
}
@@ -293,25 +293,27 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
return -1;
}
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
- tDecodeSStreamTask(&decoder, pTask);
+ tDecodeStreamTask(&decoder, pTask);
tDecoderClear(&decoder);
- if (pMeta->expandFunc(pMeta->ahandle, pTask, -1) < 0) {
+ // todo set correct initial version.
+ if (pMeta->expandFunc(pMeta->ahandle, pTask, 0) < 0) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
return -1;
}
- if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) {
+ if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) {
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
return -1;
}
- /*pTask->taskStatus = TASK_STATUS__NORMAL;*/
+
+ /*pTask->status.taskStatus = TASK_STATUS__NORMAL;*/
if (pTask->fillHistory) {
- pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM;
+ pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM;
streamTaskCheckDownstream(pTask, ver);
}
}
diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c
index 87058bf490..03afc0692d 100644
--- a/source/libs/stream/src/streamRecover.c
+++ b/source/libs/stream/src/streamRecover.c
@@ -16,9 +16,10 @@
#include "streamInc.h"
int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) {
- qDebug("task %d at node %d launch recover", pTask->taskId, pTask->nodeId);
+ qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId);
+
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
- atomic_store_8(&pTask->taskStatus, TASK_STATUS__RECOVER_PREPARE);
+ atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE);
streamSetParamForRecover(pTask);
streamSourceRecoverPrepareStep1(pTask, version);
@@ -33,34 +34,31 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) {
memcpy(serializedReq, &req, len);
- SRpcMsg rpcMsg = {
- .contLen = len,
- .pCont = serializedReq,
- .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE,
- };
-
+ SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE };
if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) {
/*ASSERT(0);*/
}
} else if (pTask->taskLevel == TASK_LEVEL__AGG) {
- atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL);
+ atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
streamSetParamForRecover(pTask);
streamAggRecoverPrepare(pTask);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
- atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL);
+ atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
}
+
return 0;
}
// checkstatus
int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) {
SStreamTaskCheckReq req = {
- .streamId = pTask->streamId,
- .upstreamTaskId = pTask->taskId,
+ .streamId = pTask->id.streamId,
+ .upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->nodeId,
.childId = pTask->selfChildId,
};
+
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
req.reqId = tGenIdPI64();
@@ -68,7 +66,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) {
req.downstreamTaskId = pTask->fixedEpDispatcher.taskId;
pTask->checkReqId = req.reqId;
- qDebug("task %d at node %d check downstream task %d at node %d", pTask->taskId, pTask->nodeId, req.downstreamTaskId,
+ qDebug("task %d at node %d check downstream task %d at node %d", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId,
req.downstreamNodeId);
streamDispatchOneCheckReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
@@ -83,12 +81,12 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) {
taosArrayPush(pTask->checkReqIds, &req.reqId);
req.downstreamNodeId = pVgInfo->vgId;
req.downstreamTaskId = pVgInfo->taskId;
- qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->taskId, pTask->nodeId,
+ qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->id.taskId, pTask->nodeId,
req.downstreamTaskId, req.downstreamNodeId);
streamDispatchOneCheckReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
} else {
- qDebug("task %d at node %d direct launch recover since no downstream", pTask->taskId, pTask->nodeId);
+ qDebug("task %d at node %d direct launch recover since no downstream", pTask->id.taskId, pTask->nodeId);
streamTaskLaunchRecover(pTask, version);
}
return 0;
@@ -104,7 +102,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp
.downstreamNodeId = pRsp->downstreamNodeId,
.childId = pRsp->childId,
};
- qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->taskId, pTask->nodeId,
+ qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->id.taskId, pTask->nodeId,
req.downstreamTaskId, req.downstreamNodeId);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
streamDispatchOneCheckReq(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet);
@@ -122,12 +120,13 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp
}
int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq) {
- return atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL;
+ return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL;
}
int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) {
qDebug("task %d at node %d recv check rsp from task %d at node %d: status %d", pRsp->upstreamTaskId,
pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status);
+
if (pRsp->status == 1) {
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
bool found = false;
@@ -138,7 +137,11 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp*
break;
}
}
- if (!found) return -1;
+
+ if (!found) {
+ return -1;
+ }
+
int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1);
ASSERT(left >= 0);
if (left == 0) {
@@ -147,7 +150,10 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp*
streamTaskLaunchRecover(pTask, version);
}
} else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
- if (pRsp->reqId != pTask->checkReqId) return -1;
+ if (pRsp->reqId != pTask->checkReqId) {
+ return -1;
+ }
+
streamTaskLaunchRecover(pTask, version);
} else {
ASSERT(0);
@@ -160,28 +166,29 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp*
// common
int32_t streamSetParamForRecover(SStreamTask* pTask) {
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
return qStreamSetParamForRecover(exec);
}
int32_t streamRestoreParam(SStreamTask* pTask) {
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
return qStreamRestoreParam(exec);
}
+
int32_t streamSetStatusNormal(SStreamTask* pTask) {
- atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL);
+ atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
return 0;
}
// source
int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) {
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
return qStreamSourceRecoverStep1(exec, ver);
}
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) {
pReq->msgHead.vgId = pTask->nodeId;
- pReq->streamId = pTask->streamId;
- pReq->taskId = pTask->taskId;
+ pReq->streamId = pTask->id.streamId;
+ pReq->taskId = pTask->id.taskId;
return 0;
}
@@ -192,13 +199,13 @@ int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) {
int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) {
pReq->msgHead.vgId = pTask->nodeId;
- pReq->streamId = pTask->streamId;
- pReq->taskId = pTask->taskId;
+ pReq->streamId = pTask->id.streamId;
+ pReq->taskId = pTask->id.taskId;
return 0;
}
int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) {
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
if (qStreamSourceRecoverStep2(exec, ver) < 0) {
}
return streamScanExec(pTask, 100);
@@ -206,7 +213,7 @@ int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) {
int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) {
SStreamRecoverFinishReq req = {
- .streamId = pTask->streamId,
+ .streamId = pTask->id.streamId,
.childId = pTask->selfChildId,
};
// serialize
@@ -227,13 +234,13 @@ int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) {
// agg
int32_t streamAggRecoverPrepare(SStreamTask* pTask) {
- void* exec = pTask->exec.executor;
pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo);
+ qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream);
return 0;
}
int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) {
- void* exec = pTask->exec.executor;
+ void* exec = pTask->exec.pExecutor;
if (qStreamRestoreParam(exec) < 0) {
return -1;
}
@@ -247,6 +254,7 @@ int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) {
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) {
if (pTask->taskLevel == TASK_LEVEL__AGG) {
int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1);
+ qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left);
ASSERT(left >= 0);
if (left == 0) {
streamAggChildrenRecoverFinish(pTask);
diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c
index 411726075e..7bea989e3a 100644
--- a/source/libs/stream/src/streamState.c
+++ b/source/libs/stream/src/streamState.c
@@ -121,7 +121,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int
char statePath[1024];
if (!specPath) {
- sprintf(statePath, "%s/%d", path, pTask->taskId);
+ sprintf(statePath, "%s/%d", path, pTask->id.taskId);
} else {
memset(statePath, 0, 1024);
tstrncpy(statePath, path, 1024);
@@ -193,6 +193,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int
}
pState->pTdbState->pOwner = pTask;
+ pState->checkPointId = 0;
return pState;
@@ -243,6 +244,7 @@ int32_t streamStateCommit(SStreamState* pState) {
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
+ pState->checkPointId++;
return 0;
}
diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c
index e9aba0bc39..67c60008fd 100644
--- a/source/libs/stream/src/streamTask.c
+++ b/source/libs/stream/src/streamTask.c
@@ -15,15 +15,22 @@
#include "executor.h"
#include "tstream.h"
+#include "wal.h"
-SStreamTask* tNewSStreamTask(int64_t streamId) {
+SStreamTask* tNewStreamTask(int64_t streamId) {
SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask));
if (pTask == NULL) {
return NULL;
}
- pTask->taskId = tGenIdPI32();
- pTask->streamId = streamId;
- pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE;
+
+ pTask->id.taskId = tGenIdPI32();
+ pTask->id.streamId = streamId;
+
+ char buf[128] = {0};
+ sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId);
+
+ pTask->id.idStr = taosStrdup(buf);
+ pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
@@ -48,24 +55,24 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) {
return 0;
}
-int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
+int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
if (tStartEncode(pEncoder) < 0) return -1;
- if (tEncodeI64(pEncoder, pTask->streamId) < 0) return -1;
- if (tEncodeI32(pEncoder, pTask->taskId) < 0) return -1;
+ if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1;
+ if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1;
if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1;
- if (tEncodeI8(pEncoder, pTask->taskStatus) < 0) return -1;
- if (tEncodeI8(pEncoder, pTask->schedStatus) < 0) return -1;
+ if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1;
+ if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1;
- if (tEncodeI64(pEncoder, pTask->recoverSnapVer) < 0) return -1;
- if (tEncodeI64(pEncoder, pTask->startVer) < 0) return -1;
+ if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1;
+ if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1;
int32_t epSz = taosArrayGetSize(pTask->childEpInfo);
@@ -101,24 +108,24 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
return pEncoder->pos;
}
-int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
+int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
if (tStartDecode(pDecoder) < 0) return -1;
- if (tDecodeI64(pDecoder, &pTask->streamId) < 0) return -1;
- if (tDecodeI32(pDecoder, &pTask->taskId) < 0) return -1;
+ if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1;
+ if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1;
if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1;
- if (tDecodeI8(pDecoder, &pTask->taskStatus) < 0) return -1;
- if (tDecodeI8(pDecoder, &pTask->schedStatus) < 0) return -1;
+ if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1;
+ if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1;
- if (tDecodeI64(pDecoder, &pTask->recoverSnapVer) < 0) return -1;
- if (tDecodeI64(pDecoder, &pTask->startVer) < 0) return -1;
+ if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1;
+ if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1;
int32_t epSz;
@@ -162,24 +169,47 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
return 0;
}
-void tFreeSStreamTask(SStreamTask* pTask) {
- qDebug("free stream task %d", pTask->taskId);
- if (pTask->inputQueue) streamQueueClose(pTask->inputQueue);
- if (pTask->outputQueue) streamQueueClose(pTask->outputQueue);
- if (pTask->exec.qmsg) taosMemoryFree(pTask->exec.qmsg);
- if (pTask->exec.executor) qDestroyTask(pTask->exec.executor);
+void tFreeStreamTask(SStreamTask* pTask) {
+ qDebug("free s-task:%s", pTask->id.idStr);
+
+ if (pTask->inputQueue) {
+ streamQueueClose(pTask->inputQueue);
+ }
+ if (pTask->outputQueue) {
+ streamQueueClose(pTask->outputQueue);
+ }
+ if (pTask->exec.qmsg) {
+ taosMemoryFree(pTask->exec.qmsg);
+ }
+
+ if (pTask->exec.pExecutor) {
+ qDestroyTask(pTask->exec.pExecutor);
+ pTask->exec.pExecutor = NULL;
+ }
+
+ if (pTask->exec.pWalReader != NULL) {
+ walCloseReader(pTask->exec.pWalReader);
+ }
+
taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree);
if (pTask->outputType == TASK_OUTPUT__TABLE) {
tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper);
taosMemoryFree(pTask->tbSink.pTSchema);
}
+
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos);
taosArrayDestroy(pTask->checkReqIds);
pTask->checkReqIds = NULL;
}
- if (pTask->pState) streamStateClose(pTask->pState);
+ if (pTask->pState) {
+ streamStateClose(pTask->pState);
+ }
+
+ if (pTask->id.idStr != NULL) {
+ taosMemoryFree((void*)pTask->id.idStr);
+ }
taosMemoryFree(pTask);
}
diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c
index db4e3a4759..dc3ff3e6de 100644
--- a/source/libs/wal/src/walRead.c
+++ b/source/libs/wal/src/walRead.c
@@ -100,6 +100,8 @@ int32_t walNextValidMsg(SWalReader *pReader) {
return -1;
}
+int64_t walReaderGetCurrentVer(const SWalReader *pReader) { return pReader->curVersion; }
+
static int64_t walReadSeekFilePos(SWalReader *pReader, int64_t fileFirstVer, int64_t ver) {
int64_t ret = 0;
diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c
index 631bcb443e..a49ff0cd5b 100644
--- a/source/util/src/tworker.c
+++ b/source/util/src/tworker.c
@@ -218,7 +218,7 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem
int32_t queueNum = taosGetQueueNumber(pool->qset);
int32_t curWorkerNum = taosArrayGetSize(pool->workers);
int32_t dstWorkerNum = ceil(queueNum * pool->ratio);
- if (dstWorkerNum < 1) dstWorkerNum = 1;
+ if (dstWorkerNum < 2) dstWorkerNum = 2;
// spawn a thread to process queue
while (curWorkerNum < dstWorkerNum) {
@@ -248,7 +248,8 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem
}
taosThreadAttrDestroy(&thAttr);
- uInfo("worker:%s:%d is launched, total:%d", pool->name, worker->id, (int32_t)taosArrayGetSize(pool->workers));
+ int32_t numOfThreads = taosArrayGetSize(pool->workers);
+ uInfo("worker:%s:%d is launched, total:%d, expect:%d", pool->name, worker->id, numOfThreads, dstWorkerNum);
curWorkerNum++;
}
diff --git a/tests/script/tsim/stream/basic1.sim b/tests/script/tsim/stream/basic1.sim
index e69875d69f..15ca6bf7c9 100644
--- a/tests/script/tsim/stream/basic1.sim
+++ b/tests/script/tsim/stream/basic1.sim
@@ -37,7 +37,7 @@ if $loop_count == 20 then
endi
if $rows != 4 then
- print =====rows=$rows
+ print =====rows=$rows, expect 4
goto loop0
endi
@@ -53,7 +53,7 @@ if $data02 != 2 then
endi
if $data03 != 5 then
- print =====data03=$data03
+ print =====data03=$data03, expect:5
goto loop0
endi