diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index fcb3160f64..34372dc2ff 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -26,6 +26,7 @@ extern "C" { typedef void* qTaskInfo_t; typedef void* DataSinkHandle; + struct SRpcMsg; struct SSubplan; @@ -91,6 +92,9 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); +// todo refactor +void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId); + /** * Set multiple input data blocks for the stream scan. * @param tinfo @@ -119,7 +123,7 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, * @param isAdd * @return */ -int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd); +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd); /** * Create the exec task object according to task json @@ -163,6 +167,7 @@ void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo); * @return */ int32_t qAsyncKillTask(qTaskInfo_t tinfo, int32_t rspCode); + int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode); bool qTaskIsExecuting(qTaskInfo_t qinfo); @@ -182,21 +187,11 @@ int32_t qSerializeTaskStatus(qTaskInfo_t tinfo, char** pOutput, int32_t* len); int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t len); STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key); -/** - * return the scan info, in the form of tuple of two items, including table uid and current timestamp - * @param tinfo - * @param uid - * @param ts - * @return - */ -int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts); -int32_t qStreamPrepareTsdbScan(qTaskInfo_t tinfo, uint64_t uid, int64_t ts); +SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType); -// int32_t qStreamScanMemData(qTaskInfo_t tinfo, const SSubmitReq* pReq, int64_t ver); -// int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit); void qStreamSetOpen(qTaskInfo_t tinfo); diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index b6ada5a0c7..cfc6ef2025 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -194,6 +194,7 @@ typedef struct SRequestConnInfo { typedef void (*__freeFunc)(void* param); +// todo add creator/destroyer function typedef struct SMsgSendInfo { __async_send_cb_fn_t fp; // async callback function STargetInfo target; // for update epset diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index fd5cec2931..42a7261f38 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -42,6 +42,7 @@ typedef struct STdbState { typedef struct { STdbState* pTdbState; int32_t number; + int64_t checkPointId; } SStreamState; SStreamState* streamStateOpen(char* path, struct SStreamTask* pTask, bool specPath, int32_t szPage, int32_t pages); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 5b1d1fa1bc..103f807191 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "executor.h" #include "os.h" +#include "executor.h" #include "query.h" #include "streamState.h" #include "tdatablock.h" @@ -50,6 +50,7 @@ enum { TASK_STATUS__RECOVER_PREPARE, TASK_STATUS__RECOVER1, TASK_STATUS__RECOVER2, + TASK_STATUS__RESTORE, // only available for source task to replay WAL from the checkpoint }; enum { @@ -103,21 +104,8 @@ typedef struct { int8_t type; } SStreamQueueItem; -#if 0 -typedef struct { - int8_t type; - int64_t ver; - int32_t* dataRef; - SSubmitReq* data; -} SStreamDataSubmit; - -typedef struct { - int8_t type; - int64_t ver; - SArray* dataRefs; // SArray - SArray* reqs; // SArray -} SStreamMergedSubmit; -#endif +typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { int8_t type; @@ -219,21 +207,20 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) { } static FORCE_INLINE void* streamQueueCurItem(SStreamQueue* queue) { - // return queue->qItem; } void* streamQueueNextItem(SStreamQueue* queue); -SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit); +SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit2* pDataSubmit); SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit); typedef struct { - char* qmsg; - // followings are not applicable to encoder and decoder - void* executor; + char* qmsg; + void* pExecutor; // not applicable to encoder and decoder + struct SWalReader* pWalReader; // not applicable to encoder and decoder } STaskExec; typedef struct { @@ -248,16 +235,13 @@ typedef struct { SUseDbRsp dbInfo; } STaskDispatcherShuffle; -typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); - typedef struct { int64_t stbUid; char stbFullName[TSDB_TABLE_FNAME_LEN]; SSchemaWrapper* pSchemaWrapper; - // not applicable to encoder and decoder - void* vnode; - FTbSink* tbSinkFunc; - STSchema* pTSchema; + void* vnode; // not available to encoder and decoder + FTbSink* tbSinkFunc; + STSchema* pTSchema; } STaskSinkTb; typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); @@ -280,24 +264,34 @@ typedef struct { SEpSet epSet; } SStreamChildEpInfo; -struct SStreamTask { - int64_t streamId; - int32_t taskId; - int32_t totalLevel; - int8_t taskLevel; - int8_t outputType; - int16_t dispatchMsgType; +typedef struct SStreamId { + int64_t streamId; + int32_t taskId; + const char* idStr; +} SStreamId; +typedef struct SCheckpointInfo { + int64_t id; + int64_t version; // offset in WAL +} SCheckpointInfo; + +typedef struct SStreamStatus { int8_t taskStatus; int8_t schedStatus; +} SStreamStatus; - // node info - int32_t selfChildId; - int32_t nodeId; - SEpSet epSet; - - int64_t recoverSnapVer; - int64_t startVer; +struct SStreamTask { + SStreamId id; + int32_t totalLevel; + int8_t taskLevel; + int8_t outputType; + int16_t dispatchMsgType; + SStreamStatus status; + int32_t selfChildId; + int32_t nodeId; + SEpSet epSet; + SCheckpointInfo chkInfo; + STaskExec exec; // fill history int8_t fillHistory; @@ -307,9 +301,6 @@ struct SStreamTask { int32_t nextCheckId; SArray* checkpointInfo; // SArray - // exec - STaskExec exec; - // output union { STaskDispatcherFixedEp fixedEpDispatcher; @@ -319,44 +310,54 @@ struct SStreamTask { STaskSinkFetch fetchSink; }; - int8_t inputStatus; - int8_t outputStatus; - - // STaosQueue* inputQueue1; - // STaosQall* inputQall; + int8_t inputStatus; + int8_t outputStatus; SStreamQueue* inputQueue; SStreamQueue* outputQueue; // trigger - int8_t triggerStatus; - int64_t triggerParam; - void* timer; + int8_t triggerStatus; + int64_t triggerParam; + void* timer; + SMsgCb* pMsgCb; // msg handle + SStreamState* pState; // state backend - // msg handle - SMsgCb* pMsgCb; - - // state backend - SStreamState* pState; - - // do not serialize - int32_t recoverTryingDownstream; - int32_t recoverWaitingUpstream; - int64_t checkReqId; - SArray* checkReqIds; // shuffle - int32_t refCnt; - - int64_t checkpointingId; - int32_t checkpointAlignCnt; + // the followings attributes don't be serialized + int32_t recoverTryingDownstream; + int32_t recoverWaitingUpstream; + int64_t checkReqId; + SArray* checkReqIds; // shuffle + int32_t refCnt; + int64_t checkpointingId; + int32_t checkpointAlignCnt; + struct SStreamMeta* pMeta; }; +// meta +typedef struct SStreamMeta { + char* path; + TDB* db; + TTB* pTaskDb; + TTB* pCheckpointDb; + SHashObj* pTasks; + void* ahandle; + TXN* txn; + FTaskExpand* expandFunc; + int32_t vgId; + SRWLatch lock; + int8_t walScan; + bool quit; +} SStreamMeta; + int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); -SStreamTask* tNewSStreamTask(int64_t streamId); -int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); -int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); -void tFreeSStreamTask(SStreamTask* pTask); -int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem); +SStreamTask* tNewStreamTask(int64_t streamId); +int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); +int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); +void tFreeStreamTask(SStreamTask* pTask); +int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem); +bool tInputQueueIsFull(const SStreamTask* pTask); static FORCE_INLINE void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); @@ -564,40 +565,22 @@ int32_t streamAggRecoverPrepare(SStreamTask* pTask); // int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask); int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId); -// expand and deploy -typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); - -// meta -typedef struct SStreamMeta { - char* path; - TDB* db; - TTB* pTaskDb; - TTB* pCheckpointDb; - SHashObj* pTasks; - SHashObj* pRecoverStatus; - void* ahandle; - TXN* txn; - FTaskExpand* expandFunc; - int32_t vgId; - SRWLatch lock; -} SStreamMeta; - SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId); void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); -int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* msg, int32_t msgLen); -// SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId); +int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); +int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); +int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); -int32_t streamMetaBegin(SStreamMeta* pMeta); -int32_t streamMetaCommit(SStreamMeta* pMeta); -int32_t streamMetaRollBack(SStreamMeta* pMeta); -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); +int32_t streamMetaBegin(SStreamMeta* pMeta); +int32_t streamMetaCommit(SStreamMeta* pMeta); +int32_t streamMetaRollBack(SStreamMeta* pMeta); +int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index ccbc53fa5d..b51289de5e 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -138,7 +138,8 @@ typedef struct { int8_t enableRef; } SWalFilterCond; -typedef struct { +// todo hide this struct +typedef struct SWalReader { SWal *pWal; int64_t readerId; TdFilePtr pLogFile; @@ -196,6 +197,7 @@ void walReadReset(SWalReader *pReader); int32_t walReadVer(SWalReader *pRead, int64_t ver); int32_t walReadSeekVer(SWalReader *pRead, int64_t ver); int32_t walNextValidMsg(SWalReader *pRead); +int64_t walReaderGetCurrentVer(const SWalReader* pReader); // only for tq usage void walSetReaderCapacity(SWalReader *pRead, int32_t capacity); diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 86db35b412..41f87379a9 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -36,14 +36,6 @@ extern "C" { #include "tconfig.h" -#define CHECK_CODE_GOTO(expr, label) \ - do { \ - code = expr; \ - if (TSDB_CODE_SUCCESS != code) { \ - goto label; \ - } \ - } while (0) - #define ERROR_MSG_BUF_DEFAULT_SIZE 512 #define HEARTBEAT_INTERVAL 1500 // ms @@ -286,28 +278,7 @@ static FORCE_INLINE SReqResultInfo* tmqGetCurResInfo(TAOS_RES* res) { return (SReqResultInfo*)&msg->resInfo; } -static FORCE_INLINE SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) { - SMqRspObj* pRspObj = (SMqRspObj*)res; - pRspObj->resIter++; - - if (pRspObj->resIter < pRspObj->rsp.blockNum) { - SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter); - if (pRspObj->rsp.withSchema) { - SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter); - setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols); - taosMemoryFreeClear(pRspObj->resInfo.row); - taosMemoryFreeClear(pRspObj->resInfo.pCol); - taosMemoryFreeClear(pRspObj->resInfo.length); - taosMemoryFreeClear(pRspObj->resInfo.convertBuf); - taosMemoryFreeClear(pRspObj->resInfo.convertJson); - } - - setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false); - return &pRspObj->resInfo; - } - - return NULL; -} +SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4); static FORCE_INLINE SReqResultInfo* tscGetCurResInfo(TAOS_RES* res) { if (TD_RES_QUERY(res)) return &(((SRequestObj*)res)->body.resInfo); @@ -320,7 +291,6 @@ extern int32_t clientConnRefPool; extern int32_t timestampDeltaLimit; extern int64_t lastClusterId; - __async_send_cb_fn_t getMsgRspHandle(int32_t msgType); SMsgSendInfo* buildMsgInfoImpl(SRequestObj* pReqObj); @@ -373,7 +343,6 @@ void taos_close_internal(void* taos); // global, called by mgmt int hbMgrInit(); void hbMgrCleanUp(); -int hbHandleRsp(SClientHbBatchRsp* hbRsp); // cluster level SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo, char* key); @@ -386,9 +355,6 @@ void stopAllRequests(SHashObj* pRequests); int hbRegisterConn(SAppHbMgr* pAppHbMgr, int64_t tscRefId, int64_t clusterId, int8_t connType); void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey); -// --- mq -void hbMgrInitMqHbRspHandle(); - typedef struct SSqlCallbackWrapper { SParseContext* pParseCtx; SCatalogReq* pCatalogReq; diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index dac44bd9c4..ce174744ef 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1039,8 +1039,7 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat .sysInfo = pRequest->pTscObj->sysInfo, .allocatorId = pRequest->allocatorRefId}; - SAppInstInfo* pAppInfo = getAppInfo(pRequest); - SQueryPlan* pDag = NULL; + SQueryPlan* pDag = NULL; int64_t st = taosGetTimestampUs(); int32_t code = qCreateQueryPlan(&cxt, &pDag, pMnodeList); @@ -1052,7 +1051,6 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat } pRequest->metric.execStart = taosGetTimestampUs(); - pRequest->metric.planCostUs = pRequest->metric.execStart - st; if (TSDB_CODE_SUCCESS == code && !pRequest->validateOnly) { diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 8c70622318..ceca06e309 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -210,6 +210,11 @@ typedef struct { tmq_t* pTmq; } SMqCommitCbParam; +typedef struct SSyncCommitInfo { + tsem_t sem; + int32_t code; +} SSyncCommitInfo; + static int32_t doAskEp(tmq_t* tmq); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); @@ -521,11 +526,7 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN return TSDB_CODE_OUT_OF_MEMORY; } - pMsgSendInfo->msgInfo = (SDataBuf){ - .pData = buf, - .len = sizeof(SMsgHead) + len, - .handle = NULL, - }; + pMsgSendInfo->msgInfo = (SDataBuf) { .pData = buf, .len = sizeof(SMsgHead) + len, .handle = NULL }; pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; @@ -786,11 +787,7 @@ void tmqSendHbReq(void* param, void* tmrId) { goto OVER; } - sendInfo->msgInfo = (SDataBuf){ - .pData = pReq, - .len = tlen, - .handle = NULL, - }; + sendInfo->msgInfo = (SDataBuf){ .pData = pReq, .len = tlen, .handle = NULL }; sendInfo->requestId = generateRequestId(); sendInfo->requestObjRefId = 0; @@ -2126,13 +2123,8 @@ void tmq_commit_async(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_cb* cb, void* } } -typedef struct SSyncCommitInfo { - tsem_t sem; - int32_t code; -} SSyncCommitInfo; - -static void commitCallBackFn(tmq_t* pTmq, int32_t code, void* param) { - SSyncCommitInfo* pInfo = (SSyncCommitInfo*)param; +static void commitCallBackFn(tmq_t *pTmq, int32_t code, void* param) { + SSyncCommitInfo* pInfo = (SSyncCommitInfo*) param; pInfo->code = code; tsem_post(&pInfo->sem); } @@ -2309,3 +2301,26 @@ void commitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, cons waitingRspNum); } } + +SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) { + SMqRspObj* pRspObj = (SMqRspObj*)res; + pRspObj->resIter++; + + if (pRspObj->resIter < pRspObj->rsp.blockNum) { + SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter); + if (pRspObj->rsp.withSchema) { + SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter); + setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols); + taosMemoryFreeClear(pRspObj->resInfo.row); + taosMemoryFreeClear(pRspObj->resInfo.pCol); + taosMemoryFreeClear(pRspObj->resInfo.length); + taosMemoryFreeClear(pRspObj->resInfo.convertBuf); + taosMemoryFreeClear(pRspObj->resInfo.convertJson); + } + + setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false); + return &pRspObj->resInfo; + } + + return NULL; +} \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c index 28097311ac..e222349767 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smInt.c +++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c @@ -55,6 +55,7 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { smClose(pMgmt); return -1; } + tmsgReportStartup("snode-impl", "initialized"); if (smStartWorker(pMgmt) != 0) { diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index fb81a764f1..c69f08eb6b 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -70,7 +70,7 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeI32(pEncoder, innerSz) < 0) return -1; for (int32_t j = 0; j < innerSz; j++) { SStreamTask *pTask = taosArrayGetP(pArray, j); - if (tEncodeSStreamTask(pEncoder, pTask) < 0) return -1; + if (tEncodeStreamTask(pEncoder, pTask) < 0) return -1; } } @@ -130,7 +130,7 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { taosArrayDestroy(pArray); return -1; } - if (tDecodeSStreamTask(pDecoder, pTask) < 0) { + if (tDecodeStreamTask(pDecoder, pTask) < 0) { taosMemoryFree(pTask); taosArrayDestroy(pArray); return -1; @@ -158,7 +158,10 @@ void tFreeStreamObj(SStreamObj *pStream) { taosMemoryFree(pStream->sql); taosMemoryFree(pStream->ast); taosMemoryFree(pStream->physicalPlan); - if (pStream->outputSchema.nCols) taosMemoryFree(pStream->outputSchema.pSchema); + + if (pStream->outputSchema.nCols) { + taosMemoryFree(pStream->outputSchema.pSchema); + } int32_t sz = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < sz; i++) { @@ -166,11 +169,14 @@ void tFreeStreamObj(SStreamObj *pStream) { int32_t taskSz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < taskSz; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - tFreeSStreamTask(pTask); + tFreeStreamTask(pTask); } + taosArrayDestroy(pLevel); } + taosArrayDestroy(pStream->tasks); + // tagSchema.pSchema if (pStream->tagSchema.nCols > 0) { taosMemoryFree(pStream->tagSchema.pSchema); diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index d1671aa12a..734f624be0 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -138,7 +138,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream for (int32_t j = 0; j < sinkLvSize; j++) { SStreamTask* pLastLevelTask = taosArrayGetP(sinkLv, j); if (pLastLevelTask->nodeId == pVgInfo->vgId) { - pVgInfo->taskId = pLastLevelTask->taskId; + pVgInfo->taskId = pLastLevelTask->id.taskId; break; } } @@ -149,7 +149,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream SArray* pArray = taosArrayGetP(pStream->tasks, 0); // one sink only SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); - pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; + pTask->fixedEpDispatcher.taskId = lastLevelTask->id.taskId; pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } @@ -224,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { sdbRelease(pSdb, pVgroup); terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -260,7 +260,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { int32_t mndAddFixedSinkTaskToStream(SMnode* pMnode, SStreamObj* pStream) { SArray* tasks = taosArrayGetP(pStream->tasks, 0); - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -350,12 +350,13 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { return -1; } - pInnerTask = tNewSStreamTask(pStream->uid); + pInnerTask = tNewStreamTask(pStream->uid); if (pInnerTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; qDestroyQueryPlan(pPlan); return -1; } + pInnerTask->fillHistory = pStream->fillHistory; mndAddTaskToTaskSet(taskInnerLevel, pInnerTask); @@ -421,7 +422,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; sdbRelease(pSdb, pVgroup); @@ -440,7 +441,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; pTask->outputType = TASK_OUTPUT__FIXED_DISPATCH; - pTask->fixedEpDispatcher.taskId = pInnerTask->taskId; + pTask->fixedEpDispatcher.taskId = pInnerTask->id.taskId; pTask->fixedEpDispatcher.nodeId = pInnerTask->nodeId; pTask->fixedEpDispatcher.epSet = pInnerTask->epSet; @@ -460,7 +461,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { pEpInfo->childId = pTask->selfChildId; pEpInfo->epSet = pTask->epSet; pEpInfo->nodeId = pTask->nodeId; - pEpInfo->taskId = pTask->taskId; + pEpInfo->taskId = pTask->id.taskId; taosArrayPush(pInnerTask->childEpInfo, &pEpInfo); sdbRelease(pSdb, pVgroup); } @@ -491,7 +492,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { sdbRelease(pSdb, pVgroup); qDestroyQueryPlan(pPlan); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index ff759f5e78..76bb144fcb 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -35,12 +35,12 @@ static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); -static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pStream, SStreamObj *pNewStream); +static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStreamObj *pNewStream); static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); -// static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); -/*static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);*/ +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -418,7 +418,7 @@ FAIL: int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) { SEncoder encoder; tEncoderInit(&encoder, NULL, 0); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); int32_t size = encoder.pos; int32_t tlen = sizeof(SMsgHead) + size; tEncoderClear(&encoder); @@ -430,7 +430,7 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) { ((SMsgHead *)buf)->vgId = htonl(pTask->nodeId); void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); tEncoderInit(&encoder, abuf, size); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); STransAction action = {0}; @@ -601,7 +601,7 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { return -1; } pReq->head.vgId = htonl(pTask->nodeId); - pReq->taskId = pTask->taskId; + pReq->taskId = pTask->id.taskId; STransAction action = {0}; memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); action.pCont = pReq; @@ -1209,7 +1209,7 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // task id pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->taskId, false); + colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false); // node type char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 3d1b356f8c..cefc4fa63e 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -32,6 +32,7 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { tDecoderClear(&decoder); goto FAIL; } + tDecoderClear(&decoder); int32_t taskId = req.taskId; @@ -65,7 +66,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { ASSERT(taosArrayGetSize(pTask->childEpInfo) != 0); pTask->refCnt = 1; - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(); pTask->outputQueue = streamQueueOpen(); @@ -76,21 +77,19 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pSnode->msgCb; - pTask->startVer = ver; + pTask->chkInfo.version = ver; + pTask->pMeta = pSnode->pMeta; pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { return -1; } - SReadHandle mgHandle = { - .vnode = NULL, - .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo), - .pStateBackend = pTask->pState, - }; + int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo); + SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState }; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0); - ASSERT(pTask->exec.executor); + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0); + ASSERT(pTask->exec.pExecutor); streamSetupTrigger(pTask); return 0; @@ -140,9 +139,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { if (pTask == NULL) { return -1; } + SDecoder decoder; tDecoderInit(&decoder, (uint8_t *)msg, msgLen); - code = tDecodeSStreamTask(&decoder, pTask); + code = tDecodeStreamTask(&decoder, pTask); if (code < 0) { tDecoderClear(&decoder); taosMemoryFree(pTask); @@ -153,7 +153,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { ASSERT(pTask->taskLevel == TASK_LEVEL__AGG); // 2.save task - code = streamMetaAddTask(pSnode->pMeta, -1, pTask); + code = streamMetaAddDeployedTask(pSnode->pMeta, -1, pTask); if (code < 0) { return -1; } diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 9911752f8e..c713d1e247 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -57,6 +57,7 @@ target_sources( # tq "src/tq/tq.c" + "src/tq/tqUtil.c" "src/tq/tqScan.c" "src/tq/tqMeta.c" "src/tq/tqRead.c" @@ -64,6 +65,7 @@ target_sources( "src/tq/tqPush.c" "src/tq/tqSink.c" "src/tq/tqCommit.c" + "src/tq/tqRestore.c" "src/tq/tqSnapshot.c" "src/tq/tqOffsetSnapshot.c" ) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 7ecfadf728..fb2c2f4be3 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -256,15 +256,16 @@ void tqCloseReader(STqReader *); void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *tbUidList); +int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id); -void tqNextBlock(STqReader *pReader, SFetchRet *ret); +void tqNextBlock(STqReader *pReader, SFetchRet *ret); +int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData); -int32_t tqReaderSetSubmitReq2(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); +int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); // int32_t tqReaderSetDataMsg(STqReader *pReader, const SSubmitReq *pMsg, int64_t ver); -bool tqNextDataBlock2(STqReader *pReader); +bool tqNextDataBlock(STqReader *pReader); bool tqNextDataBlockFilterOut2(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock2(SSDataBlock *pBlock, STqReader *pReader, SSubmitTbData **pSubmitTbDataRet); int32_t tqRetrieveTaosxBlock2(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 9037644602..c007f84790 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -80,7 +80,7 @@ typedef struct { typedef struct { int8_t subType; - STqReader* pExecReader; + STqReader* pTqReader; qTaskInfo_t task; union { STqExecCol execCol; @@ -128,6 +128,10 @@ typedef struct { tmr_h timer; } STqMgmt; +typedef struct { + int32_t size; +} STqOffsetHead; + static STqMgmt tqMgmt = {0}; int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle); @@ -154,10 +158,6 @@ int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_ int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key); int32_t tqMetaRestoreCheckInfo(STQ* pTq); -typedef struct { - int32_t size; -} STqOffsetHead; - STqOffsetStore* tqOffsetOpen(STQ* pTq); void tqOffsetClose(STqOffsetStore*); STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey); @@ -176,6 +176,18 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); +int32_t tqStreamTasksScanWal(STQ* pTq); + +// tq util +void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId); +int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver); +int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset); +int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); + +void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver); +void saveOffsetForAllTasks(STQ* pTq, int64_t ver); +void initOffsetForAllRestoreTasks(STQ* pTq); +int32_t transferToWalReadTask(SStreamMeta* pStreamMeta, SArray* pTaskList); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 253d5aebce..81f7c3d52a 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,9 +192,10 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, +int tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); -int tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); +int tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); +int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 9501bf4b8e..795f281ab2 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -531,10 +531,11 @@ static void freePayload(const void* key, size_t keyLen, void* value) { return; } - SHashObj* pHashObj = (SHashObj*)p[0]; + SHashObj* pHashObj = (SHashObj*)p[0]; + STagFilterResEntry** pEntry = taosHashGet(pHashObj, &p[1], sizeof(uint64_t)); - { + if (pEntry != NULL && (*pEntry) != NULL) { int64_t st = taosGetTimestampUs(); SListIter iter = {0}; @@ -547,9 +548,9 @@ static void freePayload(const void* key, size_t keyLen, void* value) { void* tmp = tdListPopNode(&((*pEntry)->list), pNode); taosMemoryFree(tmp); - int64_t et = taosGetTimestampUs(); - metaInfo("clear items in cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)), - (et - st) / 1000.0); + double el = (taosGetTimestampUs() - st) / 1000.0; + metaInfo("clear items in meta-cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)), + el); break; } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index c75c675ec3..ce987ca88e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -168,7 +168,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - if ((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) { + if ((terrno = qUpdateTableListForStreamScanner(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) { tdReleaseRSmaInfo(pSma, pRSmaInfo); smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i, terrstr()); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index abc8a26369..1230a352d9 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -18,6 +18,7 @@ // 0: not init // 1: already inited // 2: wait to be inited or cleaup +#define WAL_READ_TASKS_ID (-1) int32_t tqInit() { int8_t old; @@ -61,12 +62,12 @@ static void destroyTqHandle(void* data) { if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { taosMemoryFreeClear(pData->execHandle.execCol.qmsg); } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) { - tqCloseReader(pData->execHandle.pExecReader); + tqCloseReader(pData->execHandle.pTqReader); walCloseReader(pData->pWalReader); taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid); } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { walCloseReader(pData->pWalReader); - tqCloseReader(pData->execHandle.pExecReader); + tqCloseReader(pData->execHandle.pTqReader); } } @@ -82,12 +83,18 @@ static void tqPushEntryFree(void* data) { taosMemoryFree(p); } +static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { + return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && + pLeft->val.version <= pRight->val.version; +} + STQ* tqOpen(const char* path, SVnode* pVnode) { STQ* pTq = taosMemoryCalloc(1, sizeof(STQ)); if (pTq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + pTq->path = taosStrdup(path); pTq->pVnode = pVnode; pTq->walLogLastVer = pVnode->pWal->vers.lastVer; @@ -138,44 +145,6 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq); } -int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) { - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqMetaRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg resp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - tmsgSendRsp(&resp); - - tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type); - - return 0; -} - static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type) { int32_t len = 0; @@ -253,11 +222,6 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con return 0; } -static FORCE_INLINE bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { - return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && - pLeft->val.version <= pRight->val.version; -} - int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { STqOffset offset = {0}; int32_t vgId = TD_VID(pTq->pVnode); @@ -330,318 +294,6 @@ int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { return 0; } -static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) { - pRsp->reqOffset = pReq->reqOffset; - - pRsp->blockData = taosArrayInit(0, sizeof(void*)); - pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); - - if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) { - return -1; - } - - pRsp->withTbName = 0; - pRsp->withSchema = false; - return 0; -} - -static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { - pRsp->reqOffset = pReq->reqOffset; - - pRsp->withTbName = 1; - pRsp->withSchema = 1; - pRsp->blockData = taosArrayInit(0, sizeof(void*)); - pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); - pRsp->blockTbName = taosArrayInit(0, sizeof(void*)); - pRsp->blockSchema = taosArrayInit(0, sizeof(void*)); - - if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) { - return -1; - } - - return 0; -} - -static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, - SRpcMsg* pMsg, bool* pBlockReturned) { - uint64_t consumerId = pRequest->consumerId; - STqOffsetVal reqOffset = pRequest->reqOffset; - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); - int32_t vgId = TD_VID(pTq->pVnode); - - *pBlockReturned = false; - - // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. - if (pOffset != NULL) { - *pOffsetVal = pOffset->val; - - char formatBuf[80]; - tFormatOffset(formatBuf, 80, pOffsetVal); - tqDebug("tmq poll: consumer:0x%" PRIx64 - ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%" PRIx64, - consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId); - return 0; - } else { - // no poll occurs in this vnode for this topic, let's seek to the right offset value. - if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { - if (pRequest->useSnapshot) { - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot", - consumerId, pHandle->subKey, vgId); - - if (pHandle->fetchMeta) { - tqOffsetResetToMeta(pOffsetVal, 0); - } else { - tqOffsetResetToData(pOffsetVal, 0, 0); - } - } else { - pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); - if (pHandle->pRef == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // offset set to previous version when init - tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); - - tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId, - pHandle->subKey, vgId, dataRsp.rspOffset.version); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); - tDeleteSMqDataRsp(&dataRsp); - - *pBlockReturned = true; - return code; - } else { - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, pRequest); - tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - - *pBlockReturned = true; - return code; - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { - tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 - " in vg %d, subkey %s, reset none failed", - pHandle->subKey, consumerId, vgId, pRequest->subKey); - terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; - return -1; - } - } - - return 0; -} - -#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) - -static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, - SRpcMsg* pMsg, STqOffsetVal* pOffset) { - uint64_t consumerId = pRequest->consumerId; - int32_t vgId = TD_VID(pTq->pVnode); - - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); - - // lock - taosWLockLatch(&pTq->lock); - - qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); - int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); - if (code != 0) { - goto end; - } - - // till now, all data has been transferred to consumer, new data needs to push client once arrived. - if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); - taosWUnLockLatch(&pTq->lock); - return code; - } - - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); - - // NOTE: this pHandle->consumerId may have been changed already. - -end : { - char buf[80] = {0}; - tFormatOffset(buf, 80, &dataRsp.rspOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 - " code:%d", - consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code); - taosWUnLockLatch(&pTq->lock); - tDeleteSMqDataRsp(&dataRsp); -} - return code; -} - -static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, - SRpcMsg* pMsg, STqOffsetVal* offset) { - int code = 0; - int32_t vgId = TD_VID(pTq->pVnode); - SWalCkHead* pCkHead = NULL; - SMqMetaRsp metaRsp = {0}; - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, pRequest); - - if (offset->type != TMQ_OFFSET__LOG) { - if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { - return -1; - } - - if (metaRsp.metaRspLen > 0) { - code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); - tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 - ",ts:%" PRId64, - pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, - metaRsp.rspOffset.ts); - taosMemoryFree(metaRsp.metaRsp); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - - tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 - ",ts:%" PRId64, - pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, - taosxRsp.rspOffset.uid, taosxRsp.rspOffset.ts); - if (taosxRsp.blockNum > 0) { - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } else { - *offset = taosxRsp.rspOffset; - } - } - - if (offset->type == TMQ_OFFSET__LOG) { - int64_t fetchVer = offset->version + 1; - pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); - if (pCkHead == NULL) { - tDeleteSTaosxRsp(&taosxRsp); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - walSetReaderCapacity(pHandle->pWalReader, 2048); - int totalRows = 0; - while (1) { - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - if (savedEpoch > pRequest->epoch) { - tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 - ", found new consumer epoch %d, discard req epoch %d", - pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); - break; - } - - if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } - - SWalCont* pHead = &pCkHead->head; - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", - pRequest->consumerId, pRequest->epoch, vgId, fetchVer, pHead->msgType); - - // process meta - if (pHead->msgType != TDMT_VND_SUBMIT) { - if (totalRows > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } - - tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType)); - tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer); - metaRsp.resMsgType = pHead->msgType; - metaRsp.metaRspLen = pHead->bodyLen; - metaRsp.metaRsp = pHead->body; - if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { - code = -1; - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - code = 0; - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - - // process data - SPackedData submit = { - .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)), - .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), - .ver = pHead->version, - }; - - if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) { - tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId, - pRequest->subKey); - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return -1; - } - - if (totalRows >= 4096 || taosxRsp.createTableNum > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } else { - fetchVer++; - } - } - } - - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return 0; -} - -static int32_t doPollDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { - int32_t code = -1; - STqOffsetVal offset = {0}; - STqOffsetVal reqOffset = pRequest->reqOffset; - - // 1. reset the offset if needed - if (IS_OFFSET_RESET_TYPE(reqOffset.type)) { - // handle the reset offset cases, according to the consumer's choice. - bool blockReturned = false; - code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); - if (code != 0) { - return code; - } - - // empty block returned, quit - if (blockReturned) { - return 0; - } - } else { // use the consumer specified offset - // the offset value can not be monotonious increase?? - offset = reqOffset; - } - - // this is a normal subscribe requirement - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset); - } - - // todo handle the case where re-balance occurs. - // for taosx - return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); -} - int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SMqPollReq req = {0}; if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { @@ -689,7 +341,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); - return doPollDataForMq(pTq, pHandle, &req, pMsg); + return tqExtractDataForMq(pTq, pHandle, &req, pMsg); } int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { @@ -815,10 +467,10 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg &pHandle->execHandle.numOfCols, req.newConsumerId); void* scanner = NULL; qExtractStreamScanner(pHandle->execHandle.task, &scanner); - pHandle->execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); + pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL); - pHandle->execHandle.pExecReader = tqOpenReader(pVnode); + pHandle->execHandle.pTqReader = tqOpenReader(pVnode); pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); @@ -837,8 +489,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid); } - pHandle->execHandle.pExecReader = tqOpenReader(pVnode); - tqReaderSetTbUidList(pHandle->execHandle.pExecReader, tbUidList); + pHandle->execHandle.pTqReader = tqOpenReader(pVnode); + tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList); taosArrayDestroy(tbUidList); buildSnapContext(handle.meta, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta, @@ -874,7 +526,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_32(&pHandle->epoch, -1); // remove if it has been register in the push manager, and return one empty block to consumer - tqUnregisterPushEntry(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); + tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); @@ -896,16 +548,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { -#if 0 - if (pTask->taskLevel == TASK_LEVEL__AGG) { - A(taosArrayGetSize(pTask->childEpInfo) != 0); - } -#endif + // todo extract method + char buf[128] = {0}; + sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId); int32_t vgId = TD_VID(pTq->pVnode); + pTask->id.idStr = taosStrdup(buf); pTask->refCnt = 1; - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; - + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(); pTask->outputQueue = streamQueueOpen(); @@ -916,11 +566,14 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; - pTask->startVer = ver; + pTask->pMeta = pTq->pStreamMeta; + pTask->chkInfo.version = ver; // expand executor if (pTask->fillHistory) { - pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + } else { + pTask->status.taskStatus = TASK_STATUS__RESTORE; } if (pTask->taskLevel == TASK_LEVEL__SOURCE) { @@ -930,14 +583,10 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } SReadHandle handle = { - .meta = pTq->pVnode->pMeta, - .vnode = pTq->pVnode, - .initTqReader = 1, - .pStateBackend = pTask->pState, - }; + .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState}; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); - if (pTask->exec.executor == NULL) { + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); + if (pTask->exec.pExecutor == NULL) { return -1; } @@ -946,14 +595,12 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->pState == NULL) { return -1; } - SReadHandle mgHandle = { - .vnode = NULL, - .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo), - .pStateBackend = pTask->pState, - }; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId); - if (pTask->exec.executor == NULL) { + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo); + SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState}; + + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId); + if (pTask->exec.pExecutor == NULL) { return -1; } } @@ -974,16 +621,20 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { ver1 = info.skmVer; } - pTask->tbSink.pTSchema = - tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, ver1); - if (pTask->tbSink.pTSchema == NULL) { + SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper; + pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1); + if(pTask->tbSink.pTSchema == NULL) { return -1; } } + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); + } + streamSetupTrigger(pTask); - tqInfo("expand stream task on vg %d, task id %d, child id %d, level %d", vgId, pTask->taskId, pTask->selfChildId, - pTask->taskLevel); + tqInfo("vgId:%d expand stream task, s-task:%s, ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr, + pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel); return 0; } @@ -1006,18 +657,24 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { .upstreamNodeId = req.upstreamNodeId, .upstreamTaskId = req.upstreamTaskId, }; + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask && atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL) { - rsp.status = 1; + if (pTask) { + rsp.status = (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) ? 1 : 0; + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + + tqDebug("tq recv task check req(reqId:0x%" PRIx64 + ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d", + rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, rsp.upstreamTaskId, + rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; + tqDebug("tq recv task check(taskId:%d not built yet) req(reqId:0x%" PRIx64 + ") %d at node %d, check req from task %d at node %d, rsp status %d", + taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, + rsp.status); } - if (pTask) streamMetaReleaseTask(pTq->pStreamMeta, pTask); - - tqDebug("tq recv task check req(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d", - rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - SEncoder encoder; int32_t code; int32_t len; @@ -1035,13 +692,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { tEncodeSStreamTaskCheckRsp(&encoder, &rsp); tEncoderClear(&encoder); - SRpcMsg rspMsg = { - .code = 0, - .pCont = buf, - .contLen = sizeof(SMsgHead) + len, - .info = pMsg->info, - }; - + SRpcMsg rspMsg = { .code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info }; tmsgSendRsp(&rspMsg); return 0; } @@ -1057,8 +708,8 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32 tDecoderClear(&decoder); return -1; } - tDecoderClear(&decoder); + tDecoderClear(&decoder); tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d", rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); @@ -1090,17 +741,20 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - code = tDecodeSStreamTask(&decoder, pTask); + code = tDecodeStreamTask(&decoder, pTask); if (code < 0) { tDecoderClear(&decoder); taosMemoryFree(pTask); return -1; } + tDecoderClear(&decoder); // 2.save task - code = streamMetaAddTask(pTq->pStreamMeta, sversion, pTask); + code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask); if (code < 0) { + tqError("vgId:%d failed to add s-task:%s, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr, + streamMetaGetNumOfTasks(pTq->pStreamMeta)); return -1; } @@ -1109,6 +763,8 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms streamTaskCheckDownstream(pTask, sversion); } + tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", TD_VID(pTq->pVnode), + pTask->id.idStr, pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta)); return 0; } @@ -1124,7 +780,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { } // check param - int64_t fillVer1 = pTask->startVer; + int64_t fillVer1 = pTask->chkInfo.version; if (fillVer1 <= 0) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return -1; @@ -1133,7 +789,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { // do recovery step 1 streamSourceRecoverScanStep1(pTask); - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } @@ -1148,7 +804,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { return 0; } @@ -1190,7 +846,7 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t return -1; } - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } @@ -1309,7 +965,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->taskLevel != TASK_LEVEL__SOURCE) continue; - qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->taskId, ver); + qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->id.taskId, ver); if (!failed) { SStreamRefDataBlock* pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); @@ -1318,8 +974,8 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { pRefBlock->dataRef = pRef; atomic_add_fetch_32(pRefBlock->dataRef, 1); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pRefBlock) < 0) { - qError("stream task input del failed, task id %d", pTask->taskId); + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pRefBlock) < 0) { + qError("stream task input del failed, task id %d", pTask->id.taskId); atomic_sub_fetch_32(pRef, 1); taosFreeQitem(pRefBlock); @@ -1327,7 +983,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { } if (streamSchedExec(pTask) < 0) { - qError("stream task launch failed, task id %d", pTask->taskId); + qError("stream task launch failed, task id %d", pTask->id.taskId); continue; } @@ -1353,13 +1009,13 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { taosArrayPush(pStreamBlock->blocks, &block); if (!failed) { - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pStreamBlock) < 0) { - qError("stream task input del failed, task id %d", pTask->taskId); + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pStreamBlock) < 0) { + qError("stream task input del failed, task id %d", pTask->id.taskId); continue; } if (streamSchedExec(pTask) < 0) { - qError("stream task launch failed, task id %d", pTask->taskId); + qError("stream task launch failed, task id %d", pTask->id.taskId); continue; } } else { @@ -1372,17 +1028,32 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { return 0; } -int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { - void* pIter = NULL; - bool succ = true; +static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit, + const char* key, int64_t ver) { + doSaveTaskOffset(pOffsetStore, key, ver); + int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver); - SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit); + // remove the offset, if all functions are completed successfully. + if (code == TSDB_CODE_SUCCESS) { + tqOffsetDelete(pOffsetStore, key); + } + + return code; +} + +int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { +#if 0 + void* pIter = NULL; + SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT); if (pSubmit == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("failed to create data submit for stream since out of memory"); - succ = false; + saveOffsetForAllTasks(pTq, submit.ver); + return -1; } + SArray* pInputQueueFullTasks = taosArrayInit(4, POINTER_BYTES); + while (1) { pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); if (pIter == NULL) { @@ -1394,47 +1065,75 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { continue; } - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->taskId, pTask->taskStatus); + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + pTask->status.taskStatus); continue; } - tqDebug("data submit enqueue stream task:%d, ver: %" PRId64, pTask->taskId, submit.ver); - if (succ) { - int32_t code = tAppendDataForStream(pTask, (SStreamQueueItem*)pSubmit); - if (code < 0) { - // let's handle the back pressure + // check if offset value exists + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); - tqError("stream task:%d failed to put into queue for, too many", pTask->taskId); - continue; + if (tInputQueueIsFull(pTask)) { + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + + int64_t ver = submit.ver; + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver); + } else { + ver = pOffset->val.version; } - if (streamSchedExec(pTask) < 0) { - tqError("stream task:%d launch failed, code:%s", pTask->taskId, tstrerror(terrno)); - continue; - } - } else { - streamTaskInputFail(pTask); + tqDebug("s-task:%s input queue is full, discard submit block, ver:%" PRId64, pTask->id.idStr, ver); + taosArrayPush(pInputQueueFullTasks, &pTask); + continue; } + + // check if offset value exists + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + ASSERT(pOffset == NULL); + + addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); } - if (pSubmit != NULL) { - streamDataSubmitDestroy(pSubmit); - taosFreeQitem(pSubmit); - } + streamDataSubmitDestroy(pSubmit); + taosFreeQitem(pSubmit); +#endif - return succ ? 0 : -1; + tqStartStreamTasks(pTq); + return 0; } int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; - int32_t taskId = pReq->taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask) { - streamProcessRunReq(pTask); + + int32_t taskId = pReq->taskId; + int32_t vgId = TD_VID(pTq->pVnode); + + if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal + tqStreamTasksScanWal(pTq); + return 0; + } + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + if (pTask != NULL) { + if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { + tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); + streamProcessRunReq(pTask); + } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) { + tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, + pTask->id.idStr, pTask->chkInfo.version); + streamProcessRunReq(pTask); + } else { + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); + } + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + tqStartStreamTasks(pTq); return 0; } else { + tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId); return -1; } } @@ -1447,14 +1146,10 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecodeStreamDispatchReq(&decoder, &req); - int32_t taskId = req.taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessDispatchReq(pTask, &req, &rsp, exec); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; @@ -1467,7 +1162,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t taskId = ntohl(pRsp->upstreamTaskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - tqDebug("recv dispatch rsp, code: %x", pMsg->code); + tqDebug("recv dispatch rsp, code:%x", pMsg->code); if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1495,10 +1190,7 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = req.dstTaskId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessRetrieveReq(pTask, &req, &rsp); streamMetaReleaseTask(pTq->pStreamMeta, pTask); tDeleteStreamRetrieveReq(&req); @@ -1534,10 +1226,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessDispatchReq(pTask, &req, &rsp, false); streamMetaReleaseTask(pTq->pStreamMeta, pTask); rpcFreeCont(pMsg->pCont); @@ -1554,10 +1243,7 @@ FAIL: SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); if (pRspHead == NULL) { - SRpcMsg rsp = { - .code = TSDB_CODE_OUT_OF_MEMORY, - .info = pMsg->info, - }; + SRpcMsg rsp = { .code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info }; tqDebug("send dispatch error rsp, code: %x", code); tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); @@ -1575,11 +1261,7 @@ FAIL: pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; SRpcMsg rsp = { - .code = code, - .info = pMsg->info, - .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), - .pCont = pRspHead, - }; + .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead}; tqDebug("send dispatch error rsp, code: %x", code); tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); @@ -1588,3 +1270,40 @@ FAIL: } int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } + +int32_t tqStartStreamTasks(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + + SStreamMeta* pMeta = pTq->pStreamMeta; + taosWLockLatch(&pMeta->lock); + pMeta->walScan += 1; + + if (pMeta->walScan > 1) { + tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScan); + taosWUnLockLatch(&pTq->pStreamMeta->lock); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno)); + taosWUnLockLatch(&pTq->pStreamMeta->lock); + return -1; + } + + int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks); + + tqInfo("vgId:%d start wal scan stream tasks, tasks:%d", vgId, numOfTasks); + initOffsetForAllRestoreTasks(pTq); + + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = WAL_READ_TASKS_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + taosWUnLockLatch(&pTq->pStreamMeta->lock); + + return 0; +} diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c index 7fc66c4919..0f5daa31ad 100644 --- a/source/dnode/vnode/src/tq/tqCommit.c +++ b/source/dnode/vnode/src/tq/tqCommit.c @@ -16,10 +16,13 @@ #include "tq.h" int tqCommit(STQ* pTq) { +#if 0 + // stream meta commit does not be aligned to the vnode commit if (streamMetaCommit(pTq->pStreamMeta) < 0) { tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr()); return -1; } +#endif return tqOffsetCommitFile(pTq->pOffsetStore); } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 7b0cdab2f8..cd8cefb307 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -320,15 +320,15 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { code = -1; goto end; } - handle.execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); - if (handle.execHandle.pExecReader == NULL) { + handle.execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner); + if (handle.execHandle.pTqReader == NULL) { tqError("cannot extract exec reader for %s", handle.subKey); code = -1; goto end; } } else if (handle.execHandle.subType == TOPIC_SUB_TYPE__DB) { handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode); + handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode); buildSnapContext(reader.meta, reader.version, 0, handle.execHandle.subType, handle.fetchMeta, (SSnapContext**)(&reader.sContext)); @@ -343,8 +343,8 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid); } - handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode); - tqReaderSetTbUidList(handle.execHandle.pExecReader, tbUidList); + handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode); + tqReaderSetTbUidList(handle.execHandle.pTqReader, tbUidList); taosArrayDestroy(tbUidList); buildSnapContext(reader.meta, reader.version, handle.execHandle.execTb.suid, handle.execHandle.subType, diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 66d1ac2c7e..e8051a1406 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -128,31 +128,35 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey) { } int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { - if (!pStore->needCommit) return 0; + if (!pStore->needCommit) { + return 0; + } + // TODO file name should be with a newer version char* fname = tqOffsetBuildFName(pStore->pTq->path, 0); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - - int32_t err = terrno; - const char* errStr = tstrerror(err); - int32_t sysErr = errno; - const char* sysErrStr = strerror(errno); - tqError("vgId:%d, cannot open file %s when commit offset since %s", pStore->pTq->pVnode->config.vgId, fname, - sysErrStr); + const char* err = strerror(errno); + tqError("vgId:%d, failed to open offset file %s, since %s", TD_VID(pStore->pTq->pVnode), fname, err); taosMemoryFree(fname); return -1; } + taosMemoryFree(fname); + void* pIter = NULL; while (1) { pIter = taosHashIterate(pStore->pHash, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + STqOffset* pOffset = (STqOffset*)pIter; int32_t bodyLen; int32_t code; tEncodeSize(tEncodeSTqOffset, pOffset, bodyLen, code); + if (code < 0) { taosHashCancelIterate(pStore->pHash, pIter); return -1; @@ -166,6 +170,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { SEncoder encoder; tEncoderInit(&encoder, abuf, bodyLen); tEncodeSTqOffset(&encoder, pOffset); + // write file int64_t writeLen; if ((writeLen = taosWriteFile(pFile, buf, totLen)) != totLen) { @@ -174,8 +179,10 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { taosMemoryFree(buf); return -1; } + taosMemoryFree(buf); } + // close and rename file taosCloseFile(&pFile); pStore->needCommit = 0; diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index a406a793dc..7a1a6b7454 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -323,15 +323,22 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v taosWUnLockLatch(&pTq->lock); } - // push data for stream processing - if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode)) { + tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks)); + + // push data for stream processing: + // 1. the vnode has already been restored. + // 2. the vnode should be the leader. + // 3. the stream is not suspended yet. + if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) { if (taosHashGetSize(pTq->pStreamMeta->pTasks) == 0) { return 0; } if (msgType == TDMT_VND_SUBMIT) { +#if 0 void* data = taosMemoryMalloc(len); if (data == NULL) { + // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", vgId); return -1; @@ -340,7 +347,10 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v memcpy(data, pReq, len); SPackedData submit = {.msgStr = data, .msgLen = len, .ver = ver}; - tqDebug("tq copy write msg %p %d %" PRId64 " from %p", data, len, ver, pReq); + tqDebug("vgId:%d tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", vgId, data, len, ver, pReq); + tqProcessSubmitReq(pTq, submit); +#endif + SPackedData submit = {0}; tqProcessSubmitReq(pTq, submit); } @@ -352,7 +362,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v return 0; } -int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, +int32_t tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); @@ -389,7 +399,7 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, return 0; } -int32_t tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { +int32_t tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { int32_t vgId = TD_VID(pTq->pVnode); STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 72b478e6bf..25ab7209d2 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -113,7 +113,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) { } SMetaReader mr = {0}; - metaReaderInit(&mr, pHandle->execHandle.pExecReader->pVnodeMeta, 0); + metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0); if (metaGetTableEntryByName(&mr, req.tbName) < 0) { metaReaderClear(&mr); @@ -262,8 +262,6 @@ STqReader* tqOpenReader(SVnode* pVnode) { } pReader->pVnodeMeta = pVnode->pMeta; - /*pReader->pMsg = NULL;*/ -// pReader->ver = -1; pReader->pColIdList = NULL; pReader->cachedSchemaVer = 0; pReader->cachedSchemaSuid = 0; @@ -298,7 +296,29 @@ int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) { if (walReadSeekVer(pReader->pWalReader, ver) < 0) { return -1; } - tqDebug("tmq poll: wal reader seek to ver success ver:%"PRId64" %s", ver, id); + tqDebug("wal reader seek to ver:%"PRId64" %s", ver, id); + return 0; +} + +int32_t extractSubmitMsgFromWal(SWalReader* pReader, SPackedData* pPackedData) { + if (walNextValidMsg(pReader) < 0) { + return -1; + } + + void* pBody = POINTER_SHIFT(pReader->pHead->head.body, sizeof(SSubmitReq2Msg)); + int32_t len = pReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); + int64_t ver = pReader->pHead->head.version; + + void* data = taosMemoryMalloc(len); + if (data == NULL) { + // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", 0); + return -1; + } + + memcpy(data, pBody, len); + *pPackedData = (SPackedData){.ver = ver, .msgLen = len, .msgStr = data}; return 0; } @@ -309,26 +329,28 @@ void tqNextBlock(STqReader* pReader, SFetchRet* ret) { ret->fetchType = FETCH_TYPE__NONE; return; } - void* body = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); + + void* pBody = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); int32_t bodyLen = pReader->pWalReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); int64_t ver = pReader->pWalReader->pHead->head.version; - tqReaderSetSubmitReq2(pReader, body, bodyLen, ver); + tqReaderSetSubmitMsg(pReader, pBody, bodyLen, ver); } - while (tqNextDataBlock2(pReader)) { + while (tqNextDataBlock(pReader)) { memset(&ret->data, 0, sizeof(SSDataBlock)); int32_t code = tqRetrieveDataBlock2(&ret->data, pReader, NULL); if (code != 0 || ret->data.info.rows == 0) { continue; } + ret->fetchType = FETCH_TYPE__DATA; return; } } } -int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { +int32_t tqReaderSetSubmitMsg(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { pReader->msg2.msgStr = msgStr; pReader->msg2.msgLen = msgLen; pReader->msg2.ver = ver; @@ -345,7 +367,7 @@ int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen, return 0; } -bool tqNextDataBlock2(STqReader* pReader) { +bool tqNextDataBlock(STqReader* pReader) { if (pReader->msg2.msgStr == NULL) { return false; } @@ -354,13 +376,20 @@ bool tqNextDataBlock2(STqReader* pReader) { while (pReader->nextBlk < blockSz) { tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg2.msgStr, pReader->msg2.msgLen, pReader->msg2.ver, pReader->nextBlk); + SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); - if (pReader->tbIdHash == NULL) return true; + if (pReader->tbIdHash == NULL) { + return true; + } void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t)); if (ret != NULL) { + tqDebug("tq reader block found, ver:%"PRId64", uid:%"PRId64, pReader->msg2.ver, pSubmitTbData->uid); return true; + } else { + tqDebug("tq reader discard block, uid:%"PRId64", continue", pSubmitTbData->uid); } + pReader->nextBlk++; } @@ -427,7 +456,10 @@ int32_t tqRetrieveDataBlock2(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbD SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); pReader->nextBlk++; - if (pSubmitTbDataRet) *pSubmitTbDataRet = pSubmitTbData; + if (pSubmitTbDataRet) { + *pSubmitTbDataRet = pSubmitTbData; + } + int32_t sversion = pSubmitTbData->sver; int64_t suid = pSubmitTbData->suid; int64_t uid = pSubmitTbData->uid; @@ -900,7 +932,7 @@ int tqReaderSetTbUidList(STqReader* pReader, const SArray* tbUidList) { return 0; } -int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) { +int tqReaderAddTbUidList(STqReader* pReader, const SArray* pTableUidList) { if (pReader->tbIdHash == NULL) { pReader->tbIdHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); if (pReader->tbIdHash == NULL) { @@ -909,8 +941,9 @@ int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) { } } - for (int i = 0; i < taosArrayGetSize(tbUidList); i++) { - int64_t* pKey = (int64_t*)taosArrayGet(tbUidList, i); + int32_t numOfTables = taosArrayGetSize(pTableUidList); + for (int i = 0; i < numOfTables; i++) { + int64_t* pKey = (int64_t*)taosArrayGet(pTableUidList, i); taosHashPut(pReader->tbIdHash, pKey, sizeof(int64_t), NULL, 0); } @@ -926,30 +959,34 @@ int tqReaderRemoveTbUidList(STqReader* pReader, const SArray* tbUidList) { return 0; } +// todo update the table list in wal reader int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { - void* pIter = NULL; + void* pIter = NULL; + int32_t vgId = TD_VID(pTq->pVnode); + + // update the table list for each consumer handle while (1) { pIter = taosHashIterate(pTq->pHandle, pIter); if (pIter == NULL) { break; } - STqHandle* pExec = (STqHandle*)pIter; - if (pExec->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - int32_t code = qUpdateQualifiedTableId(pExec->execHandle.task, tbUidList, isAdd); + STqHandle* pTqHandle = (STqHandle*)pIter; + if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + int32_t code = qUpdateTableListForStreamScanner(pTqHandle->execHandle.task, tbUidList, isAdd); if (code != 0) { - tqError("update qualified table error for %s", pExec->subKey); + tqError("update qualified table error for %s", pTqHandle->subKey); continue; } - } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__DB) { + } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { if (!isAdd) { int32_t sz = taosArrayGetSize(tbUidList); for (int32_t i = 0; i < sz; i++) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); - taosHashPut(pExec->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0); + taosHashPut(pTqHandle->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0); } } - } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { + } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { if (isAdd) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); SMetaReader mr = {0}; @@ -964,35 +1001,43 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } tDecoderClear(&mr.coder); - - if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pExec->execHandle.execTb.suid) { + if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pTqHandle->execHandle.execTb.suid) { tqDebug("table uid %" PRId64 " does not add to tq handle", *id); continue; } + tqDebug("table uid %" PRId64 " add to tq handle", *id); taosArrayPush(qa, id); } + metaReaderClear(&mr); if (taosArrayGetSize(qa) > 0) { - tqReaderAddTbUidList(pExec->execHandle.pExecReader, qa); + tqReaderAddTbUidList(pTqHandle->execHandle.pTqReader, qa); } + taosArrayDestroy(qa); } else { - tqReaderRemoveTbUidList(pExec->execHandle.pExecReader, tbUidList); + tqReaderRemoveTbUidList(pTqHandle->execHandle.pTqReader, tbUidList); } } } + + // update the table list handle for each stream scanner/wal reader while (1) { pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - int32_t code = qUpdateQualifiedTableId(pTask->exec.executor, tbUidList, isAdd); + int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); if (code != 0) { - tqError("update qualified table error for stream task %d", pTask->taskId); + tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); continue; } } } + return 0; } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c new file mode 100644 index 0000000000..6ed74ddcc3 --- /dev/null +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tq.h" + +static int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle); +static int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList); + +// this function should be executed by stream threads. +// there is a case that the WAL increases more fast than the restore procedure, and this restore procedure +// will not stop eventually. +int tqStreamTasksScanWal(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + int64_t st = taosGetTimestampMs(); + + while (1) { + tqInfo("vgId:%d continue check if data in wal are available", vgId); + + // check all restore tasks + bool allFull = true; + streamTaskReplayWal(pTq->pStreamMeta, pTq->pOffsetStore, &allFull); + + int32_t times = 0; + + if (allFull) { + taosWLockLatch(&pMeta->lock); + pMeta->walScan -= 1; + times = pMeta->walScan; + + if (pMeta->walScan <= 0) { + taosWUnLockLatch(&pMeta->lock); + break; + } + + taosWUnLockLatch(&pMeta->lock); + tqInfo("vgId:%d scan wal for stream tasks for %d times", vgId, times); + } + } + + double el = (taosGetTimestampMs() - st) / 1000.0; + tqInfo("vgId:%d scan wal for stream tasks completed, elapsed time:%.2f sec", vgId, el); + + // restore wal scan flag +// atomic_store_8(&pTq->pStreamMeta->walScan, 0); + return 0; +} + +//int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { +// int32_t numOfTask = taosArrayGetSize(pTaskList); +// if (numOfTask <= 0) { +// return TSDB_CODE_SUCCESS; +// } +// +// // todo: add lock +// for (int32_t i = 0; i < numOfTask; ++i) { +// SStreamTask* pTask = taosArrayGetP(pTaskList, i); +// tqDebug("vgId:%d transfer s-task:%s state restore -> ready, checkpoint:%" PRId64 " checkpoint id:%" PRId64, +// pStreamMeta->vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->chkInfo.id); +// taosHashRemove(pStreamMeta->pWalReadTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); +// +// // NOTE: do not change the following order +// atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); +// taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); +// } +// +// return TSDB_CODE_SUCCESS; +//} + +int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle) { + void* pIter = NULL; + int32_t vgId = pStreamMeta->vgId; + + *pScanIdle = true; + + bool allWalChecked = true; + tqDebug("vgId:%d start to check wal to extract new submit block", vgId); + + while (1) { + pIter = taosHashIterate(pStreamMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || + pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, + pTask->status.taskStatus); + continue; + } + + // check if offset value exists + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + if (tInputQueueIsFull(pTask)) { + tqDebug("vgId:%d s-task:%s input queue is full, do nothing", vgId, pTask->id.idStr); + continue; + } + + *pScanIdle = false; + + // check if offset value exists + STqOffset* pOffset = tqOffsetRead(pOffsetStore, key); + ASSERT(pOffset != NULL); + + // seek the stored version and extract data from WAL + int32_t code = walReadSeekVer(pTask->exec.pWalReader, pOffset->val.version); + if (code != TSDB_CODE_SUCCESS) { // no data in wal, quit + continue; + } + + // append the data for the stream + tqDebug("vgId:%d wal reader seek to ver:%" PRId64 " %s", vgId, pOffset->val.version, pTask->id.idStr); + + SPackedData packData = {0}; + code = extractSubmitMsgFromWal(pTask->exec.pWalReader, &packData); + if (code != TSDB_CODE_SUCCESS) { // failed, continue + continue; + } + + SStreamDataSubmit2* p = streamDataSubmitNew(packData, STREAM_INPUT__DATA_SUBMIT); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("%s failed to create data submit for stream since out of memory", pTask->id.idStr); + continue; + } + + allWalChecked = false; + + tqDebug("s-task:%s submit data extracted from WAL", pTask->id.idStr); + code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)p, packData.ver); + if (code == TSDB_CODE_SUCCESS) { + pOffset->val.version = walReaderGetCurrentVer(pTask->exec.pWalReader); + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, + pOffset->val.version); + } else { + // do nothing + } + + streamDataSubmitDestroy(p); + taosFreeQitem(p); + } + + if (allWalChecked) { + *pScanIdle = true; + } + return 0; +} + diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index f01e169a53..27db66f048 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -38,7 +38,7 @@ int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t } static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp) { - SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pExecReader->pSchemaWrapper); + SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pTqReader->pSchemaWrapper); if (pSW == NULL) { return -1; } @@ -137,7 +137,7 @@ int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMeta if (pDataBlock != NULL && pDataBlock->info.rows > 0) { if (pRsp->withTbName) { if (pOffset->type == TMQ_OFFSET__LOG) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, 1) < 0) { continue; } @@ -203,9 +203,9 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR SArray* pSchemas = taosArrayInit(0, sizeof(void*)); if (pExec->subType == TOPIC_SUB_TYPE__TABLE) { - STqReader* pReader = pExec->pExecReader; - tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver); - while (tqNextDataBlock2(pReader)) { + STqReader* pReader = pExec->pTqReader; + tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver); + while (tqNextDataBlock(pReader)) { taosArrayClear(pBlocks); taosArrayClear(pSchemas); SSubmitTbData* pSubmitTbDataRet = NULL; @@ -213,7 +213,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); @@ -262,8 +262,8 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR } } } else if (pExec->subType == TOPIC_SUB_TYPE__DB) { - STqReader* pReader = pExec->pExecReader; - tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver); + STqReader* pReader = pExec->pTqReader; + tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver); while (tqNextDataBlockFilterOut2(pReader, pExec->execDb.pFilterOutTbUid)) { taosArrayClear(pBlocks); taosArrayClear(pSchemas); @@ -272,7 +272,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 4645df5b67..62b81305b7 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -87,7 +87,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d return; } - tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz); + tqDebug("vgId:%d, s-task:%s write into table, block num: %d", TD_VID(pVnode), pTask->id.idStr, blockSz); for (int32_t i = 0; i < blockSz; i++) { bool createTb = true; SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); @@ -382,7 +382,7 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* int32_t blockSz = taosArrayGetSize(pBlocks); - tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz); + tqDebug("vgId:%d, s-task:%s write results blocks:%d into table", TD_VID(pVnode), pTask->id.idStr, blockSz); void* pBuf = NULL; SArray* tagArray = NULL; @@ -475,11 +475,9 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* } for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); - STagVal tagVal = { - .cid = pTSchema->numOfCols + step, - .type = pTagData->info.type, - }; - void* pData = colDataGetData(pTagData, rowId); + + STagVal tagVal = {.cid = pTSchema->numOfCols + step, .type = pTagData->info.type}; + void* pData = colDataGetData(pTagData, rowId); if (colDataIsNull_s(pTagData, rowId)) { continue; } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c new file mode 100644 index 0000000000..4c37e1052f --- /dev/null +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -0,0 +1,462 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tq.h" + +#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) + +static int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp); + +// stream_task:stream_id:task_id +void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { + int32_t n = 12; + char* p = dst; + + memcpy(p, "stream_task:", n); + p += n; + + int32_t inc = tintToHex(streamId, p); + p += inc; + + *(p++) = ':'; + tintToHex(taskId, p); +} + +int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) { + int32_t code = tAppendDataToInputQueue(pTask, pQueueItem); + if (code < 0) { + tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver); + return -1; + } + + if (streamSchedExec(pTask) < 0) { + tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno)); + return -1; + } + + return TSDB_CODE_SUCCESS; +} + +void initOffsetForAllRestoreTasks(STQ* pTq) { + void* pIter = NULL; + + while(1) { + pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, since not ready, status %d", pTask->id.idStr, pTask->status.taskStatus); + continue; + } + + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, pTask->chkInfo.version); + } + } +} + +void saveOffsetForAllTasks(STQ* pTq, int64_t ver) { + void* pIter = NULL; + + while(1) { + pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, + pTask->status.taskStatus); + continue; + } + + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, ver); + } + } +} + +void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) { + STqOffset offset = {0}; + tqOffsetResetToLog(&offset.val, ver); + + tstrncpy(offset.subKey, pKey, tListLen(offset.subKey)); + + // keep the offset info in the offset store + tqOffsetWrite(pOffsetStore, &offset); +} + +static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) { + pRsp->reqOffset = pReq->reqOffset; + + pRsp->blockData = taosArrayInit(0, sizeof(void*)); + pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); + + if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) { + return -1; + } + + pRsp->withTbName = 0; + pRsp->withSchema = false; + return 0; +} + +static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { + pRsp->reqOffset = pReq->reqOffset; + + pRsp->withTbName = 1; + pRsp->withSchema = 1; + pRsp->blockData = taosArrayInit(0, sizeof(void*)); + pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); + pRsp->blockTbName = taosArrayInit(0, sizeof(void*)); + pRsp->blockSchema = taosArrayInit(0, sizeof(void*)); + + if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) { + return -1; + } + + return 0; +} + +static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, bool* pBlockReturned) { + uint64_t consumerId = pRequest->consumerId; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); + int32_t vgId = TD_VID(pTq->pVnode); + + *pBlockReturned = false; + + // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. + if (pOffset != NULL) { + *pOffsetVal = pOffset->val; + + char formatBuf[80]; + tFormatOffset(formatBuf, 80, pOffsetVal); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%"PRIx64, + consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId); + return 0; + } else { + // no poll occurs in this vnode for this topic, let's seek to the right offset value. + if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { + if (pRequest->useSnapshot) { + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot", + consumerId, pHandle->subKey, vgId); + + if (pHandle->fetchMeta) { + tqOffsetResetToMeta(pOffsetVal, 0); + } else { + tqOffsetResetToData(pOffsetVal, 0, 0); + } + } else { + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + // offset set to previous version when init + tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId, + pHandle->subKey, vgId, dataRsp.rspOffset.version); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + tDeleteSMqDataRsp(&dataRsp); + + *pBlockReturned = true; + return code; + } else { + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + + *pBlockReturned = true; + return code; + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { + tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 " in vg %d, subkey %s, reset none failed", + pHandle->subKey, consumerId, vgId, pRequest->subKey); + terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; + return -1; + } + } + + return 0; +} + +static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, STqOffsetVal* pOffset) { + uint64_t consumerId = pRequest->consumerId; + int32_t vgId = TD_VID(pTq->pVnode); + + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + // lock + taosWLockLatch(&pTq->lock); + + qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); + int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); + if(code != 0) { + goto end; + } + + // till now, all data has been transferred to consumer, new data needs to push client once arrived. + if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && + dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { + code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + taosWUnLockLatch(&pTq->lock); + return code; + } + + + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); + + // NOTE: this pHandle->consumerId may have been changed already. + + end: + { + char buf[80] = {0}; + tFormatOffset(buf, 80, &dataRsp.rspOffset); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d", + consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code); + taosWUnLockLatch(&pTq->lock); + tDeleteSMqDataRsp(&dataRsp); + } + return code; +} + + +static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal *offset) { + int code = 0; + int32_t vgId = TD_VID(pTq->pVnode); + SWalCkHead* pCkHead = NULL; + SMqMetaRsp metaRsp = {0}; + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + + if (offset->type != TMQ_OFFSET__LOG) { + if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { + return -1; + } + + if (metaRsp.metaRspLen > 0) { + code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); + tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",ts:%" PRId64, + pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts); + taosMemoryFree(metaRsp.metaRsp); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + + tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 + ",ts:%" PRId64,pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid,taosxRsp.rspOffset.ts); + if (taosxRsp.blockNum > 0) { + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + return code; + }else { + *offset = taosxRsp.rspOffset; + } + } + + + if (offset->type == TMQ_OFFSET__LOG) { + int64_t fetchVer = offset->version + 1; + pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); + if (pCkHead == NULL) { + tDeleteSTaosxRsp(&taosxRsp); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + walSetReaderCapacity(pHandle->pWalReader, 2048); + int totalRows = 0; + while (1) { + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + if (savedEpoch > pRequest->epoch) { + tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 + ", found new consumer epoch %d, discard req epoch %d", pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); + break; + } + + if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } + + SWalCont* pHead = &pCkHead->head; + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", pRequest->consumerId, + pRequest->epoch, vgId, fetchVer, pHead->msgType); + + // process meta + if (pHead->msgType != TDMT_VND_SUBMIT) { + if(totalRows > 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } + + tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType)); + tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer); + metaRsp.resMsgType = pHead->msgType; + metaRsp.metaRspLen = pHead->bodyLen; + metaRsp.metaRsp = pHead->body; + if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { + code = -1; + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + code = 0; + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + + // process data + SPackedData submit = { + .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)), + .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), + .ver = pHead->version, + }; + + if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) { + tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId, + pRequest->subKey); + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return -1; + } + + if (totalRows >= 4096 || taosxRsp.createTableNum > 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } else { + fetchVer++; + } + } + } + + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return 0; +} + +int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { + int32_t code = -1; + STqOffsetVal offset = {0}; + STqOffsetVal reqOffset = pRequest->reqOffset; + + // 1. reset the offset if needed + if (IS_OFFSET_RESET_TYPE(reqOffset.type)) { + // handle the reset offset cases, according to the consumer's choice. + bool blockReturned = false; + code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); + if (code != 0) { + return code; + } + + // empty block returned, quit + if (blockReturned) { + return 0; + } + } else { // use the consumer specified offset + // the offset value can not be monotonious increase?? + offset = reqOffset; + } + + // this is a normal subscribe requirement + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset); + } + + // todo handle the case where re-balance occurs. + // for taosx + return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); +} + +int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) { + int32_t len = 0; + int32_t code = 0; + tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code); + if (code < 0) { + return -1; + } + int32_t tlen = sizeof(SMqRspHead) + len; + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + return -1; + } + + ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP; + ((SMqRspHead*)buf)->epoch = pReq->epoch; + ((SMqRspHead*)buf)->consumerId = pReq->consumerId; + + void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); + + SEncoder encoder = {0}; + tEncoderInit(&encoder, abuf, len); + tEncodeSMqMetaRsp(&encoder, pRsp); + tEncoderClear(&encoder); + + SRpcMsg resp = { + .info = pMsg->info, + .pCont = buf, + .contLen = tlen, + .code = 0, + }; + tmsgSendRsp(&resp); + + tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d", + TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type); + + return 0; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index a58d050009..89686c3d33 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2089,7 +2089,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, pBlockScanInfo->lastKey = tsLastBlock; return TSDB_CODE_SUCCESS; } else { - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2113,7 +2113,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, } } } else { // not merge block data - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2576,7 +2576,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc SRow* pTSRow = NULL; SRowMerger merge = {0}; - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -3243,8 +3243,8 @@ static int32_t readRowsCountFromFiles(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; while (1) { - bool hasNext = false; - int32_t code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); + bool hasNext = false; + code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); if (code) { return code; } @@ -3516,8 +3516,8 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_ int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; int64_t endVer = 0; - if (pCond->endVersion == - -1) { // user not specified end version, set current maximum version of vnode as the endVersion + if (pCond->endVersion == -1) { + // user not specified end version, set current maximum version of vnode as the endVersion endVer = pVnode->state.applied; } else { endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 65fac5a475..b62bf27def 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -400,7 +400,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } break; case TDMT_STREAM_TASK_DEPLOY: { - if (tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) { + if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) { goto _err; } } break; @@ -447,13 +447,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp walApplyVer(pVnode->pWal, version); - /*vInfo("vgId:%d, push msg begin", pVnode->config.vgId);*/ if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } - /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ // commit if need if (needCommit) { @@ -541,13 +539,10 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return vnodeGetBatchMeta(pVnode, pMsg); case TDMT_VND_TMQ_CONSUME: return tqProcessPollReq(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); -#if 1 case TDMT_STREAM_TASK_DISPATCH: return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); -#endif case TDMT_STREAM_TASK_CHECK: return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH_RSP: diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 2b4eff08d3..d4a394b584 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -553,6 +553,9 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) pVnode->restored = true; vInfo("vgId:%d, sync restore finished", pVnode->config.vgId); + + // start to restore all stream tasks + tqStartStreamTasks(pVnode->pTq); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 7d90560c33..2cb6626b03 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -127,14 +127,9 @@ enum { }; typedef struct { - // TODO remove prepareStatus - // STqOffsetVal prepareStatus; // for tmq - STqOffsetVal currentOffset; // for tmq - SMqMetaRsp metaRsp; // for tmq fetching meta - // int8_t returned; - int64_t snapshotVer; - // const SSubmitReq* pReq; - + STqOffsetVal currentOffset; // for tmq + SMqMetaRsp metaRsp; // for tmq fetching meta + int64_t snapshotVer; SPackedData submit; SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; @@ -144,6 +139,8 @@ typedef struct { int64_t fillHistoryVer1; int64_t fillHistoryVer2; SStreamState* pState; + int64_t dataVersion; + int64_t checkPointId; } SStreamTaskInfo; typedef struct { @@ -191,7 +188,6 @@ enum { OP_OPENED = 0x1, OP_RES_TO_RETURN = 0x5, OP_EXEC_DONE = 0x9, - // OP_EXEC_RECV = 0x11, }; typedef struct SOperatorFpSet { @@ -560,6 +556,7 @@ typedef struct SStreamIntervalOperatorInfo { uint64_t numOfDatapack; SArray* pUpdated; SSHashObj* pUpdatedMap; + int64_t dataVersion; } SStreamIntervalOperatorInfo; typedef struct SDataGroupInfo { @@ -609,6 +606,7 @@ typedef struct SStreamSessionAggOperatorInfo { bool ignoreExpiredDataSaved; SArray* pUpdated; SSHashObj* pStUpdated; + int64_t dataVersion; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -627,6 +625,7 @@ typedef struct SStreamStateAggOperatorInfo { bool ignoreExpiredDataSaved; SArray* pUpdated; SSHashObj* pSeUpdated; + int64_t dataVersion; } SStreamStateAggOperatorInfo; typedef struct SStreamPartitionOperatorInfo { @@ -827,7 +826,7 @@ void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); void doDestroyTask(SExecTaskInfo* pTaskInfo); void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); -char* buildTaskId(uint64_t taskId, uint64_t queryId); +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst); SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 0229631d40..6e3a7d8725 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -127,7 +127,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu pOperator->status = OP_NOT_OPENED; SStreamScanInfo* pInfo = pOperator->info; - qDebug("task stream set total blocks:%d %s", (int32_t)numOfBlocks, id); + qDebug("s-task set source blocks:%d %s", (int32_t)numOfBlocks, id); ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0); if (type == STREAM_INPUT__MERGED_SUBMIT) { @@ -173,9 +173,7 @@ void doSetTaskId(SOperatorInfo* pOperator) { void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) { SExecTaskInfo* pTaskInfo = tinfo; pTaskInfo->id.queryId = queryId; - - taosMemoryFreeClear(pTaskInfo->id.str); - pTaskInfo->id.str = buildTaskId(taskId, queryId); + buildTaskId(taskId, queryId, pTaskInfo->id.str); // set the idstr for tsdbReader doSetTaskId(pTaskInfo->pRoot); @@ -198,6 +196,13 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { return code; } +void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) { + SExecTaskInfo* pTaskInfo = tinfo; + *dataVer = pTaskInfo->streamInfo.dataVersion; + *ckId = pTaskInfo->streamInfo.checkPointId; +} + + int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; @@ -363,27 +368,23 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S return qa; } -int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) { +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + const char* id = GET_TASKID(pTaskInfo); + int32_t code = 0; if (isAdd) { - qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), pTaskInfo->id.str); + qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), id); } // traverse to the stream scanner node to add this table id - SOperatorInfo* pInfo = pTaskInfo->pRoot; - while (pInfo->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - pInfo = pInfo->pDownstream[0]; - } - - int32_t code = 0; + SOperatorInfo* pInfo = extractOperatorInTree(pTaskInfo->pRoot, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, id); SStreamScanInfo* pScanInfo = pInfo->info; + if (isAdd) { // add new table id SArray* qa = filterUnqualifiedTables(pScanInfo, tableIdList, GET_TASKID(pTaskInfo)); int32_t numOfQualifiedTables = taosArrayGetSize(qa); - - qDebug(" %d qualified child tables added into stream scanner", numOfQualifiedTables); - + qDebug("%d qualified child tables added into stream scanner, %s", numOfQualifiedTables, id); code = tqReaderAddTbUidList(pScanInfo->tqReader, qa); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(qa); @@ -424,19 +425,6 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo } } -#if 0 - bool exists = false; - for (int32_t k = 0; k < taosArrayGetSize(pListInfo->pTableList); ++k) { - STableKeyInfo* pKeyInfo = taosArrayGet(pListInfo->pTableList, k); - if (pKeyInfo->uid == keyInfo.uid) { - qWarn("ignore duplicated query table uid:%" PRIu64 " added, %s", pKeyInfo->uid, pTaskInfo->id.str); - exists = true; - } - } - - if (!exists) { -#endif - tableListAddTableInfo(pTableListInfo, keyInfo.uid, keyInfo.groupId); } @@ -447,7 +435,7 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo taosArrayDestroy(qa); } else { // remove the table id in current list - qDebug(" %d remove child tables from the stream scanner", (int32_t)taosArrayGetSize(tableIdList)); + qDebug("%d remove child tables from the stream scanner, %s", (int32_t)taosArrayGetSize(tableIdList), id); taosWLockLatch(&pTaskInfo->lock); code = tqReaderRemoveTbUidList(pScanInfo->tqReader, tableIdList); taosWUnLockLatch(&pTaskInfo->lock); @@ -1273,3 +1261,22 @@ void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { rpcFreeCont(pMsg->pCont); destroySendMsgInfo(pSendInfo); } + +SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = tinfo; + SArray* plist = getTableListInfo(pTaskInfo); + + // only extract table in the first elements + STableListInfo* pTableListInfo = taosArrayGetP(plist, 0); + + SArray* pUidList = taosArrayInit(10, sizeof(uint64_t)); + + int32_t numOfTables = tableListGetSize(pTableListInfo); + for(int32_t i = 0; i < numOfTables; ++i) { + STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i); + taosArrayPush(pUidList, &pKeyInfo->uid); + } + + taosArrayDestroy(plist); + return pUidList; +} diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index a0697a7102..7594079cfb 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1151,8 +1151,8 @@ void cleanupExprSupp(SExprSupp* pSupp) { void cleanupBasicInfo(SOptrBasicInfo* pInfo) { pInfo->pRes = blockDataDestroy(pInfo->pRes); } -char* buildTaskId(uint64_t taskId, uint64_t queryId) { - char* p = taosMemoryMalloc(64); +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst) { + char* p = dst; int32_t offset = 6; memcpy(p, "TID:0x", offset); @@ -1163,7 +1163,6 @@ char* buildTaskId(uint64_t taskId, uint64_t queryId) { offset += tintToHex(queryId, &p[offset]); p[offset] = 0; - return p; } SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model, @@ -1185,7 +1184,9 @@ SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t v taosInitRWLatch(&pTaskInfo->lock); pTaskInfo->id.vgId = vgId; pTaskInfo->id.queryId = queryId; - pTaskInfo->id.str = buildTaskId(taskId, queryId); + + pTaskInfo->id.str = taosMemoryMalloc(64); + buildTaskId(taskId, queryId, pTaskInfo->id.str); return pTaskInfo; } @@ -2008,7 +2009,11 @@ void qStreamCloseTsdbReader(void* task) { } static void extractTableList(SArray* pList, const SOperatorInfo* pOperator) { - if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + SStreamScanInfo* pScanInfo = pOperator->info; + STableScanInfo* pTableScanInfo = pScanInfo->pTableScanOp->info; + taosArrayPush(pList, &pTableScanInfo->base.pTableListInfo); + } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { STableScanInfo* pScanInfo = pOperator->info; taosArrayPush(pList, &pScanInfo->base.pTableListInfo); } else { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index cd539cc987..ae396a4c68 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1637,7 +1637,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (pTaskInfo->streamInfo.submit.msgStr != NULL) { if (pInfo->tqReader->msg2.msgStr == NULL) { SPackedData submit = pTaskInfo->streamInfo.submit; - if (tqReaderSetSubmitReq2(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { + if (tqReaderSetSubmitMsg(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { qError("submit msg messed up when initing stream submit block %p", submit.msgStr); return NULL; } @@ -1646,7 +1646,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { blockDataCleanup(pInfo->pRes); SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; - while (tqNextDataBlock2(pInfo->tqReader)) { + while (tqNextDataBlock(pInfo->tqReader)) { SSDataBlock block = {0}; int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL); @@ -1812,7 +1812,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { /*resetTableScanInfo(pTSInfo, pWin);*/ tsdbReaderClose(pTSInfo->base.dataReader); - qDebug("4"); pTSInfo->base.dataReader = NULL; pInfo->pTableScanOp->status = OP_OPENED; @@ -1895,7 +1894,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); - qDebug("5"); pTSInfo->base.dataReader = NULL; @@ -1922,6 +1920,7 @@ FETCH_NEXT_BLOCK: if (pBlock->info.parTbName[0]) { streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName); } + // TODO move into scan pBlock->info.calWin.skey = INT64_MIN; pBlock->info.calWin.ekey = INT64_MAX; @@ -2064,7 +2063,7 @@ FETCH_NEXT_BLOCK: int32_t current = pInfo->validBlockIndex++; SPackedData* pSubmit = taosArrayGet(pInfo->pBlockLists, current); - if (tqReaderSetSubmitReq2(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) { + if (tqReaderSetSubmitMsg(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) { qError("submit msg messed up when initing stream submit block %p, current %d, total %d", pSubmit, current, totBlockNum); continue; @@ -2073,7 +2072,7 @@ FETCH_NEXT_BLOCK: blockDataCleanup(pInfo->pRes); - while (tqNextDataBlock2(pInfo->tqReader)) { + while (tqNextDataBlock(pInfo->tqReader)) { SSDataBlock block = {0}; int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index b42b9c467e..007a6f63d1 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2333,9 +2333,15 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN return startPos; } +static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { + pTaskInfo->streamInfo.dataVersion = version; + pTaskInfo->streamInfo.checkPointId = ckId; +} + static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, SSHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; @@ -2432,10 +2438,8 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pSDataBlock->info.rows, numOfOutput); - SWinKey key = { - .ts = nextWin.skey, - .groupId = groupId, - }; + + SWinKey key = { .ts = nextWin.skey, .groupId = groupId }; saveOutputBuf(pInfo->pState, &key, pResult, pInfo->aggSup.resultRowSize); releaseOutputBuf(pInfo->pState, &key, pResult); if (pInfo->delKey.ts > key.ts) { @@ -2503,6 +2507,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { clearFunctionContext(&pOperator->exprSupp); // semi interval operator clear disk buffer clearStreamIntervalOperator(pInfo); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); qDebug("===stream===clear semi operator"); } else { deleteIntervalDiscBuf(pInfo->pState, pInfo->pPullDataMap, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark, @@ -2776,6 +2781,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->numOfDatapack = 0; pInfo->pUpdated = NULL; pInfo->pUpdatedMap = NULL; + pInfo->dataVersion = 0; pOperator->operatorType = pPhyNode->type; pOperator->blocking = true; @@ -3126,6 +3132,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData int32_t rows = pSDataBlock->info.rows; int32_t winRows = 0; + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; SColumnInfoData* pEndTsCol = NULL; @@ -3589,6 +3597,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh pInfo->ignoreExpiredDataSaved = false; pInfo->pUpdated = NULL; pInfo->pStUpdated = NULL; + pInfo->dataVersion = 0; setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -3899,6 +3908,9 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl TSKEY* tsCols = NULL; SResultRow* pResult = NULL; int32_t winRows = 0; + + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + if (pSDataBlock->pDataBlock != NULL) { SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); tsCols = (int64_t*)pColDataInfo->pData; @@ -4115,6 +4127,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->ignoreExpiredDataSaved = false; pInfo->pUpdated = NULL; pInfo->pSeUpdated = NULL; + pInfo->dataVersion = 0; setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -4750,6 +4763,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { &pInfo->delKey); setOperatorCompleted(pOperator); streamStateCommit(pTaskInfo->streamInfo.pState); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); return NULL; } @@ -4771,6 +4785,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { pInfo->numOfDatapack = 0; break; } + pInfo->numOfDatapack++; printDataBlock(pBlock, "single interval recv"); diff --git a/source/libs/stream/CMakeLists.txt b/source/libs/stream/CMakeLists.txt index ceddf4f215..790547bb61 100644 --- a/source/libs/stream/CMakeLists.txt +++ b/source/libs/stream/CMakeLists.txt @@ -9,7 +9,7 @@ target_include_directories( target_link_libraries( stream PUBLIC tdb - PRIVATE os util transport qcom executor + PRIVATE os util transport qcom executor wal ) if(${BUILD_TEST}) diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 66496f11f8..876b80697a 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -44,7 +44,7 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, SEpSet* pEpSet); -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem); +SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); #ifdef __cplusplus } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 361cd2cacc..7171b52912 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,6 +16,8 @@ #include "streamInc.h" #include "ttimer.h" +#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 2000 + int32_t streamInit() { int8_t old; while (1) { @@ -50,7 +52,7 @@ void streamCleanUp() { void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(NULL, pTask); return; } @@ -64,15 +66,16 @@ void streamSchedByTimer(void* param, void* tmrId) { taosFreeQitem(trigger); return; } - trigger->pBlock->info.type = STREAM_GET_ALL; + trigger->pBlock->info.type = STREAM_GET_ALL; atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)trigger) < 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) { taosFreeQitem(trigger); taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); return; } + streamSchedExec(pTask); } @@ -91,31 +94,33 @@ int32_t streamSetupTrigger(SStreamTask* pTask) { int32_t streamSchedExec(SStreamTask* pTask) { int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING); + atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING); if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); + terrno = TSDB_CODE_OUT_OF_MEMORY; + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); return -1; } pRunReq->head.vgId = pTask->nodeId; - pRunReq->streamId = pTask->streamId; - pRunReq->taskId = pTask->taskId; + pRunReq->streamId = pTask->id.streamId; + pRunReq->taskId = pTask->id.taskId; SRpcMsg msg = { .msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq) }; tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); + qDebug("trigger to run s-task:%s", pTask->id.idStr); } return 0; } -int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { +int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); int8_t status; - // enqueue + // enqueue data block if (pData != NULL) { pData->type = STREAM_INPUT__DATA_BLOCK; pData->srcVgId = pReq->dataSrcVgId; @@ -123,10 +128,10 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR /*pData->blocks = pReq->data;*/ /*pBlock->sourceVer = pReq->sourceVer;*/ streamDispatchReqToData(pReq, pData); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; - } else { - status = TASK_INPUT_STATUS__FAILED; + } else { // input queue is full, upstream is blocked now + status = TASK_INPUT_STATUS__BLOCKED; } } else { streamTaskInputFail(pTask); @@ -142,10 +147,12 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR pCont->upstreamNodeId = htonl(pReq->upstreamNodeId); pCont->upstreamTaskId = htonl(pReq->upstreamTaskId); pCont->downstreamNodeId = htonl(pTask->nodeId); - pCont->downstreamTaskId = htonl(pTask->taskId); + pCont->downstreamTaskId = htonl(pTask->id.taskId); pRsp->pCont = buf; + pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); tmsgSendRsp(pRsp); + return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } @@ -155,7 +162,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, // enqueue if (pData != NULL) { - qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->taskId, pTask->selfChildId, + qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId, pReq->srcTaskId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; @@ -164,7 +171,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, /*pData->blocks = pReq->data;*/ /*pBlock->sourceVer = pReq->sourceVer;*/ streamRetrieveReqToData(pReq, pData); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; } else { status = TASK_INPUT_STATUS__FAILED; @@ -205,10 +212,10 @@ int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBlock* pBlock) { } int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { - qDebug("task %d receive dispatch req from node %d task %d", pTask->taskId, pReq->upstreamNodeId, + qDebug("vgId:%d s-task:%s receive dispatch req from taskId:%d", pReq->upstreamNodeId, pTask->id.idStr, pReq->upstreamTaskId); - streamTaskEnqueue(pTask, pReq, pRsp); + streamTaskEnqueueBlocks(pTask, pReq, pRsp); tDeleteStreamDispatchReq(pReq); if (exec) { @@ -228,13 +235,14 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { ASSERT(pRsp->inputStatus == TASK_OUTPUT_STATUS__NORMAL || pRsp->inputStatus == TASK_OUTPUT_STATUS__BLOCKED); - - qDebug("task %d receive dispatch rsp, code: %x", pTask->taskId, code); + qDebug("s-task:%s receive dispatch rsp, code: %x", pTask->id.idStr, code); if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - qDebug("task %d is shuffle, left waiting rsp %d", pTask->taskId, leftRsp); - if (leftRsp > 0) return 0; + qDebug("task %d is shuffle, left waiting rsp %d", pTask->id.taskId, leftRsp); + if (leftRsp > 0) { + return 0; + } } int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus); @@ -261,7 +269,7 @@ int32_t streamProcessRunReq(SStreamTask* pTask) { } int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { - qDebug("task %d receive retrieve req from node %d task %d", pTask->taskId, pReq->srcNodeId, pReq->srcTaskId); + qDebug("task %d receive retrieve req from node %d task %d", pTask->id.taskId, pReq->srcNodeId, pReq->srcTaskId); streamTaskEnqueueRetrieve(pTask, pReq, pRsp); @@ -275,26 +283,43 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, S return 0; } -int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { +bool tInputQueueIsFull(const SStreamTask* pTask) { + return taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY; +} + +int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { int8_t type = pItem->type; if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit2* pSubmitBlock = streamSubmitBlockClone((SStreamDataSubmit2*)pItem); if (pSubmitBlock == NULL) { - qDebug("task %d %p submit enqueue failed since out of memory", pTask->taskId, pTask); + qDebug("task %d %p submit enqueue failed since out of memory", pTask->id.taskId, pTask); terrno = TSDB_CODE_OUT_OF_MEMORY; atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); return -1; } int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; - qDebug("stream task:%d %p submit enqueue %p %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->taskId, - pTask, pItem, pSubmitBlock, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, + qDebug("s-task:%s submit enqueue %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->id.idStr, + pItem, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, pSubmitBlock->submit.ver, total); + if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { + qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); + streamDataSubmitDestroy(pSubmitBlock); + return -1; + } + taosWriteQitem(pTask->inputQueue->queue, pSubmitBlock); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { + int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; + if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { + qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); + return -1; + } + + qDebug("s-task:%s data block enqueue, total in queue:%d", pTask->id.idStr, total); taosWriteQitem(pTask->inputQueue->queue, pItem); } else if (type == STREAM_INPUT__CHECKPOINT) { taosWriteQitem(pTask->inputQueue->queue, pItem); @@ -307,7 +332,6 @@ int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { } #if 0 - // TODO: back pressure atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__NORMAL); #endif diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 3fba1cb556..ae616260f3 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -67,9 +67,8 @@ int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock return 0; } -SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) { +SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type) { SStreamDataSubmit2* pDataSubmit = (SStreamDataSubmit2*)taosAllocateQitem(sizeof(SStreamDataSubmit2), DEF_QITEM, 0); - if (pDataSubmit == NULL) { return NULL; } @@ -82,7 +81,7 @@ SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) { pDataSubmit->submit = submit; *pDataSubmit->dataRef = 1; // initialize the reference count to be 1 - pDataSubmit->type = STREAM_INPUT__DATA_SUBMIT; + pDataSubmit->type = type; return pDataSubmit; } @@ -139,28 +138,27 @@ SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit) { return pSubmitClone; } -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem) { - ASSERT(elem); - if (dst->type == STREAM_INPUT__DATA_BLOCK && elem->type == STREAM_INPUT__DATA_BLOCK) { +SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { + if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; - SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)elem; + SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); taosArrayDestroy(pBlockSrc->blocks); - taosFreeQitem(elem); + taosFreeQitem(pElem); return dst; - } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) { + } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)dst; - SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)elem; + SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)pElem; streamMergeSubmit(pMerged, pBlockSrc); - taosFreeQitem(elem); + taosFreeQitem(pElem); return dst; - } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) { + } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit2* pMerged = streamMergedSubmitNew(); ASSERT(pMerged); streamMergeSubmit(pMerged, (SStreamDataSubmit2*)dst); - streamMergeSubmit(pMerged, (SStreamDataSubmit2*)elem); + streamMergeSubmit(pMerged, (SStreamDataSubmit2*)pElem); taosFreeQitem(dst); - taosFreeQitem(elem); + taosFreeQitem(pElem); return (SStreamQueueItem*)pMerged; } else { return NULL; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 7e7c23f98a..a9f6d29bf5 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -121,9 +121,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); SStreamRetrieveReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .srcNodeId = pTask->nodeId, - .srcTaskId = pTask->taskId, + .srcTaskId = pTask->id.taskId, .pRetrieve = pRetrieve, .retrieveLen = dataStrLen, }; @@ -168,7 +168,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) } buf = NULL; - qDebug("task %d(child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->taskId, + qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->id.idStr, pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); } code = 0; @@ -238,7 +238,8 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* msg.pCont = buf; msg.msgType = TDMT_STREAM_TASK_CHECK; - qDebug("dispatch from task %d to task %d node %d: check msg", pTask->taskId, pReq->downstreamTaskId, nodeId); + qDebug("dispatch from s-task:%s to downstream s-task:%"PRIx64":%d node %d: check msg", pTask->id.idStr, + pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); @@ -282,7 +283,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov tmsgSendReq(pEpSet, &msg); - qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->taskId, pReq->taskId, vgId); + qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->id.taskId, pReq->taskId, vgId); return 0; FAIL: @@ -319,8 +320,7 @@ int32_t streamDispatchOneDataReq(SStreamTask* pTask, const SStreamDispatchReq* p msg.pCont = buf; msg.msgType = pTask->dispatchMsgType; - qDebug("dispatch from task %d to task %d node %d: data msg", pTask->taskId, pReq->taskId, vgId); - + qDebug("dispatch from s-task:%s to taskId:%d vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); tmsgSendReq(pEpSet, &msg); code = 0; @@ -382,9 +382,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { SStreamDispatchReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .dataSrcVgId = pData->srcVgId, - .upstreamTaskId = pTask->taskId, + .upstreamTaskId = pTask->id.taskId, .upstreamChildId = pTask->selfChildId, .upstreamNodeId = pTask->nodeId, .blockNum = blockNum, @@ -402,14 +402,15 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat goto FAIL_FIXED_DISPATCH; } } + int32_t vgId = pTask->fixedEpDispatcher.nodeId; SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet; int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; req.taskId = downstreamTaskId; - qDebug("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->taskId, pTask->selfChildId, - downstreamTaskId, vgId); + qDebug("s-task:%s (child taskId:%d) dispatch blocks:%d to down stream s-task:%d in vgId:%d", pTask->id.idStr, + pTask->selfChildId, blockNum, downstreamTaskId, vgId); if (streamDispatchOneDataReq(pTask, &req, vgId, pEpSet) < 0) { goto FAIL_FIXED_DISPATCH; @@ -432,9 +433,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat } for (int32_t i = 0; i < vgSz; i++) { - pReqs[i].streamId = pTask->streamId; + pReqs[i].streamId = pTask->id.streamId; pReqs[i].dataSrcVgId = pData->srcVgId; - pReqs[i].upstreamTaskId = pTask->taskId; + pReqs[i].upstreamTaskId = pTask->id.taskId; pReqs[i].upstreamChildId = pTask->selfChildId; pReqs[i].upstreamNodeId = pTask->nodeId; pReqs[i].blockNum = 0; @@ -494,6 +495,8 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat int32_t streamDispatch(SStreamTask* pTask) { ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH); + qDebug("s-task:%s try to dispatch intermediate result block to downstream, numofBlocks in outputQ:%d", pTask->id.idStr, + taosQueueItemSize(pTask->outputQueue->queue)); int8_t old = atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); @@ -503,13 +506,12 @@ int32_t streamDispatch(SStreamTask* pTask) { SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue); if (pBlock == NULL) { - qDebug("stream stop dispatching since no output: task %d", pTask->taskId); + qDebug("s-task:%s stream stop dispatching since no output in output queue", pTask->id.idStr); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); return 0; } - ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); - qDebug("stream dispatching: task %d", pTask->taskId); + ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); int32_t code = 0; if (streamDispatchAllBlocks(pTask, pBlock) < 0) { @@ -518,6 +520,7 @@ int32_t streamDispatch(SStreamTask* pTask) { atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); goto FREE; } + FREE: taosArrayDestroyEx(pBlock->blocks, (FDelete)blockDataFreeRes); taosFreeQitem(pBlock); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 6ef327049c..3d896c08ac 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -18,70 +18,82 @@ #define STREAM_EXEC_MAX_BATCH_NUM 100 static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* pRes) { - int32_t code; - void* exec = pTask->exec.executor; - while(pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) { - qError("stream task wait for the end of fill history"); - taosMsleep(2); - continue; + int32_t code = TSDB_CODE_SUCCESS; + void* pExecutor = pTask->exec.pExecutor; + + while (pTask->taskLevel == TASK_LEVEL__SOURCE) { + int8_t status = atomic_load_8(&pTask->status.taskStatus); + if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__RESTORE) { + qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, + atomic_load_8(&pTask->status.taskStatus)); + taosMsleep(2); + } else { + break; + } } // set input const SStreamQueueItem* pItem = (const SStreamQueueItem*)data; if (pItem->type == STREAM_INPUT__GET_RES) { const SStreamTrigger* pTrigger = (const SStreamTrigger*)data; - qSetMultiStreamInput(exec, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit2* pSubmit = (const SStreamDataSubmit2*)data; - qDebug("stream task:%d %p set submit input %p %p %d %" PRId64, pTask->taskId, pTask, pSubmit, pSubmit->submit.msgStr, + qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, pTask->id.idStr, pSubmit, pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); - qSetMultiStreamInput(exec, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { const SStreamDataBlock* pBlock = (const SStreamDataBlock*)data; - SArray* blocks = pBlock->blocks; - qDebug("task %d %p set ssdata input", pTask->taskId, pTask); - qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__DATA_BLOCK); + + SArray* pBlockList = pBlock->blocks; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s set sdata blocks as input num:%d, ver:%"PRId64, pTask->id.idStr, numOfBlocks, pBlock->sourceVer); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { const SStreamMergedSubmit2* pMerged = (const SStreamMergedSubmit2*)data; - SArray* blocks = pMerged->submits; - qDebug("task %d %p set submit input (merged), batch num: %d", pTask->taskId, pTask, (int32_t)blocks->size); - qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__MERGED_SUBMIT); + + SArray* pBlockList = pMerged->submits; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("st-task:%s %p set submit input (merged), batch num:%d", pTask->id.idStr, pTask, numOfBlocks); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)data; - qSetMultiStreamInput(exec, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else { ASSERT(0); } - // exec + // pExecutor while (1) { - if (pTask->taskStatus == TASK_STATUS__DROPPING) { + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { return 0; } SSDataBlock* output = NULL; uint64_t ts = 0; - if ((code = qExecTask(exec, &output, &ts)) < 0) { + if ((code = qExecTask(pExecutor, &output, &ts)) < 0) { if (code == TSDB_CODE_QRY_IN_EXEC) { - resetTaskInfo(exec); + resetTaskInfo(pExecutor); } - /*ASSERT(false);*/ - qError("unexpected stream execution, stream %" PRId64 " task: %d, since %s", pTask->streamId, pTask->taskId, - terrstr()); + + qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr()); continue; } + if (output == NULL) { if (pItem->type == STREAM_INPUT__DATA_RETRIEVE) { - SSDataBlock block = {0}; + SSDataBlock block = {0}; + const SStreamDataBlock* pRetrieveBlock = (const SStreamDataBlock*)data; ASSERT(taosArrayGetSize(pRetrieveBlock->blocks) == 1); + assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); block.info.type = STREAM_PULL_OVER; block.info.childId = pTask->selfChildId; taosArrayPush(pRes, &block); - qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->taskId, pTask->selfChildId, + qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId, pRetrieveBlock->reqId); } break; @@ -94,20 +106,21 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* continue; } - qDebug("task %d(child %d) executed and get block", pTask->taskId, pTask->selfChildId); + qDebug("task %d(child %d) executed and get block", pTask->id.taskId, pTask->selfChildId); SSDataBlock block = {0}; assignOneDataBlock(&block, output); block.info.childId = pTask->selfChildId; taosArrayPush(pRes, &block); } + return 0; } int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; qSetStreamOpOpen(exec); bool finished = false; @@ -121,7 +134,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { int32_t batchCnt = 0; while (1) { - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { taosArrayDestroy(pRes); return 0; } @@ -147,17 +160,17 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { batchCnt++; - qDebug("task %d scan exec block num %d, block limit %d", pTask->taskId, batchCnt, batchSz); + qDebug("task %d scan exec block num %d, block limit %d", pTask->id.taskId, batchCnt, batchSz); if (batchCnt >= batchSz) break; } if (taosArrayGetSize(pRes) == 0) { if (finished) { taosArrayDestroy(pRes); - qDebug("task %d finish recover exec task ", pTask->taskId); + qDebug("task %d finish recover exec task ", pTask->id.taskId); break; } else { - qDebug("task %d continue recover exec task ", pTask->taskId); + qDebug("task %d continue recover exec task ", pTask->id.taskId); continue; } } @@ -173,7 +186,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { streamTaskOutput(pTask, qRes); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { - qDebug("task %d scan exec dispatch block num %d", pTask->taskId, batchCnt); + qDebug("task %d scan exec dispatch block num %d", pTask->id.taskId, batchCnt); streamDispatch(pTask); } if (finished) break; @@ -186,7 +199,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { // fetch all queue item, merge according to batchLimit int32_t numOfItems = taosReadAllQitems(pTask->inputQueue1, pTask->inputQall); if (numOfItems == 0) { - qDebug("task: %d, stream task exec over, queue empty", pTask->taskId); + qDebug("task: %d, stream task exec over, queue empty", pTask->id.taskId); return 0; } SStreamQueueItem* pMerged = NULL; @@ -221,106 +234,141 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { int32_t streamExecForAll(SStreamTask* pTask) { while (1) { - int32_t batchCnt = 1; - void* input = NULL; + int32_t batchSize = 1; + void* pInput = NULL; + + // merge multiple input data if possible in the input queue. while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { - qDebug("stream task exec over, queue empty, task: %d", pTask->taskId); +// qDebug("s-task:%s extract data from input queue, queue is empty, abort", pTask->id.idStr); break; } - if (input == NULL) { - input = qItem; + + if (pInput == NULL) { + pInput = qItem; streamQueueProcessSuccess(pTask->inputQueue); if (pTask->taskLevel == TASK_LEVEL__SINK) { break; } } else { - void* newRet; - if ((newRet = streamMergeQueueItem(input, qItem)) == NULL) { + void* newRet = NULL; + if ((newRet = streamMergeQueueItem(pInput, qItem)) == NULL) { streamQueueProcessFail(pTask->inputQueue); break; } else { - batchCnt++; - input = newRet; + batchSize++; + pInput = newRet; streamQueueProcessSuccess(pTask->inputQueue); - if (batchCnt > STREAM_EXEC_MAX_BATCH_NUM) { + if (batchSize > STREAM_EXEC_MAX_BATCH_NUM) { break; } } } } - if (pTask->taskStatus == TASK_STATUS__DROPPING) { - if (input) streamFreeQitem(input); + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + if (pInput) { + streamFreeQitem(pInput); + } return 0; } - if (input == NULL) { + if (pInput == NULL) { break; } if (pTask->taskLevel == TASK_LEVEL__SINK) { - ASSERT(((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_BLOCK); - streamTaskOutput(pTask, input); + ASSERT(((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_BLOCK); + qDebug("s-task:%s sink node start to sink result. numOfBlocks:%d", pTask->id.idStr, batchSize); + streamTaskOutput(pTask, pInput); continue; } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); + qDebug("s-task:%s exec begin, numOfBlocks:%d", pTask->id.idStr, batchSize); - qDebug("stream task:%d exec begin, msg batch: %d", pTask->taskId, batchCnt); - streamTaskExecImpl(pTask, input, pRes); + streamTaskExecImpl(pTask, pInput, pRes); - qDebug("stream task:%d exec end", pTask->taskId); + int64_t ckId = 0; + int64_t dataVer = 0; + qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); + if (dataVer > pTask->chkInfo.version) { // save it since the checkpoint is updated + qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64 + ", checkPoint id:%" PRId64 " -> %" PRId64, + pTask->id.idStr, pTask->chkInfo.version, dataVer, pTask->chkInfo.id, ckId); + + pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId}; + + taosWLockLatch(&pTask->pMeta->lock); + streamMetaSaveTask(pTask->pMeta, pTask); + if (streamMetaCommit(pTask->pMeta) < 0) { + taosWUnLockLatch(&pTask->pMeta->lock); + qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr()); + return -1; + } else { + taosWUnLockLatch(&pTask->pMeta->lock); + qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr); + } + } else { + qDebug("s-task:%s exec end", pTask->id.idStr); + } if (taosArrayGetSize(pRes) != 0) { SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); if (qRes == NULL) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - streamFreeQitem(input); + streamFreeQitem(pInput); return -1; } + qRes->type = STREAM_INPUT__DATA_BLOCK; qRes->blocks = pRes; - if (((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_SUBMIT) { - SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)input; + if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_SUBMIT) { + SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)pInput; qRes->childId = pTask->selfChildId; qRes->sourceVer = pSubmit->ver; - } else if (((SStreamQueueItem*)input)->type == STREAM_INPUT__MERGED_SUBMIT) { - SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)input; + } else if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__MERGED_SUBMIT) { + SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)pInput; qRes->childId = pTask->selfChildId; qRes->sourceVer = pMerged->ver; } if (streamTaskOutput(pTask, qRes) < 0) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - streamFreeQitem(input); + streamFreeQitem(pInput); taosFreeQitem(qRes); return -1; } } else { taosArrayDestroy(pRes); } - streamFreeQitem(input); + streamFreeQitem(pInput); } return 0; } int32_t streamTryExec(SStreamTask* pTask) { + // this function may be executed by multi-threads, so status check is required. int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); + atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); + if (schedStatus == TASK_SCHED_STATUS__WAITING) { int32_t code = streamExecForAll(pTask); if (code < 0) { - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__FAILED); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); return -1; } - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); + + // todo the task should be commit here + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qDebug("s-task:%s exec completed", pTask->id.idStr); if (!taosQueueEmpty(pTask->inputQueue->queue)) { streamSchedExec(pTask); } } + return 0; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 03391c0ba2..51cc315780 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -24,6 +24,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + int32_t len = strlen(path) + 20; char* streamPath = taosMemoryCalloc(1, len); sprintf(streamPath, "%s/%s", path, "stream"); @@ -50,7 +51,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } - pMeta->pTasks = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); + _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT); + pMeta->pTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); if (pMeta->pTasks == NULL) { goto _err; } @@ -59,9 +61,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } + pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; - + taosInitRWLatch(&pMeta->lock); return pMeta; _err: @@ -81,19 +84,28 @@ void streamMetaClose(SStreamMeta* pMeta) { tdbClose(pMeta->db); void* pIter = NULL; +// while(pMeta->walScan) { +// qDebug("wait stream daemon quit"); +// taosMsleep(100); +// } + while (1) { pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->timer) { taosTmrStop(pTask->timer); pTask->timer = NULL; } - tFreeSStreamTask(pTask); + + tFreeStreamTask(pTask); /*streamMetaReleaseTask(pMeta, pTask);*/ } + taosHashCleanup(pMeta->pTasks); - taosHashCleanup(pMeta->pRecoverStatus); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } @@ -106,7 +118,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, } SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - if (tDecodeSStreamTask(&decoder, pTask) < 0) { + if (tDecodeStreamTask(&decoder, pTask) < 0) { tDecoderClear(&decoder); goto FAIL; } @@ -117,12 +129,12 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, goto FAIL; } - if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { goto FAIL; } - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) { - taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t)); + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) { + taosHashRemove(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t)); ASSERT(0); goto FAIL; } @@ -130,7 +142,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, return 0; FAIL: - if (pTask) tFreeSStreamTask(pTask); + if (pTask) tFreeStreamTask(pTask); return -1; } #endif @@ -139,7 +151,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { void* buf = NULL; int32_t len; int32_t code; - tEncodeSize(tEncodeSStreamTask, pTask, len, code); + tEncodeSize(tEncodeStreamTask, pTask, len, code); if (code < 0) { return -1; } @@ -150,10 +162,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, len); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { return -1; } @@ -161,8 +173,8 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -#if 1 -int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { +// add to the ready tasks hash map, not the restored tasks hash map +int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { return -1; } @@ -171,39 +183,24 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { return -1; } - taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)); - + taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); return 0; } -#endif -#if 0 -SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId) { - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - ASSERT((*ppTask)->taskId == taskId); - return *ppTask; - } else { - return NULL; - } +int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) { + return (int32_t) taosHashGetSize(pMeta->pTasks); } -#endif SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { taosRLockLatch(&pMeta->lock); SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - SStreamTask* pTask = *ppTask; - if (atomic_load_8(&pTask->taskStatus) != TASK_STATUS__DROPPING) { - atomic_add_fetch_32(&pTask->refCnt, 1); - taosRUnLockLatch(&pMeta->lock); - return pTask; - } else { - taosRUnLockLatch(&pMeta->lock); - return NULL; - } + if (ppTask != NULL && (atomic_load_8(&((*ppTask)->status.taskStatus)) != TASK_STATUS__DROPPING)) { + atomic_add_fetch_32(&(*ppTask)->refCnt, 1); + taosRUnLockLatch(&pMeta->lock); + return *ppTask; } + taosRUnLockLatch(&pMeta->lock); return NULL; } @@ -212,8 +209,8 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { int32_t left = atomic_sub_fetch_32(&pTask->refCnt, 1); ASSERT(left >= 0); if (left == 0) { - ASSERT(atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING); - tFreeSStreamTask(pTask); + ASSERT(atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING); + tFreeStreamTask(pTask); } } @@ -227,7 +224,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { * taosTmrStop(pTask->timer);*/ /*pTask->timer = NULL;*/ /*}*/ - atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); taosWLockLatch(&pMeta->lock); streamMetaReleaseTask(pMeta, pTask); @@ -245,9 +242,12 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) { int32_t streamMetaCommit(SStreamMeta* pMeta) { if (tdbCommit(pMeta->db, pMeta->txn) < 0) { + ASSERT(0); return -1; } + if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { + ASSERT(0); return -1; } @@ -293,25 +293,27 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSStreamTask(&decoder, pTask); + tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); - if (pMeta->expandFunc(pMeta->ahandle, pTask, -1) < 0) { + // todo set correct initial version. + if (pMeta->expandFunc(pMeta->ahandle, pTask, 0) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); return -1; } - if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); return -1; } - /*pTask->taskStatus = TASK_STATUS__NORMAL;*/ + + /*pTask->status.taskStatus = TASK_STATUS__NORMAL;*/ if (pTask->fillHistory) { - pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; streamTaskCheckDownstream(pTask, ver); } } diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 87058bf490..03afc0692d 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -16,9 +16,10 @@ #include "streamInc.h" int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { - qDebug("task %d at node %d launch recover", pTask->taskId, pTask->nodeId); + qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__RECOVER_PREPARE); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); streamSetParamForRecover(pTask); streamSourceRecoverPrepareStep1(pTask, version); @@ -33,34 +34,31 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { memcpy(serializedReq, &req, len); - SRpcMsg rpcMsg = { - .contLen = len, - .pCont = serializedReq, - .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, - }; - + SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE }; if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) { /*ASSERT(0);*/ } } else if (pTask->taskLevel == TASK_LEVEL__AGG) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); streamSetParamForRecover(pTask); streamAggRecoverPrepare(pTask); } else if (pTask->taskLevel == TASK_LEVEL__SINK) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); } + return 0; } // checkstatus int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { SStreamTaskCheckReq req = { - .streamId = pTask->streamId, - .upstreamTaskId = pTask->taskId, + .streamId = pTask->id.streamId, + .upstreamTaskId = pTask->id.taskId, .upstreamNodeId = pTask->nodeId, .childId = pTask->selfChildId, }; + // serialize if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { req.reqId = tGenIdPI64(); @@ -68,7 +66,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; pTask->checkReqId = req.reqId; - qDebug("task %d at node %d check downstream task %d at node %d", pTask->taskId, pTask->nodeId, req.downstreamTaskId, + qDebug("task %d at node %d check downstream task %d at node %d", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { @@ -83,12 +81,12 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->taskId, pTask->nodeId, + qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("task %d at node %d direct launch recover since no downstream", pTask->taskId, pTask->nodeId); + qDebug("task %d at node %d direct launch recover since no downstream", pTask->id.taskId, pTask->nodeId); streamTaskLaunchRecover(pTask, version); } return 0; @@ -104,7 +102,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp .downstreamNodeId = pRsp->downstreamNodeId, .childId = pRsp->childId, }; - qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->taskId, pTask->nodeId, + qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { streamDispatchOneCheckReq(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); @@ -122,12 +120,13 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp } int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq) { - return atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL; + return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL; } int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { qDebug("task %d at node %d recv check rsp from task %d at node %d: status %d", pRsp->upstreamTaskId, pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status); + if (pRsp->status == 1) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { bool found = false; @@ -138,7 +137,11 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* break; } } - if (!found) return -1; + + if (!found) { + return -1; + } + int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1); ASSERT(left >= 0); if (left == 0) { @@ -147,7 +150,10 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* streamTaskLaunchRecover(pTask, version); } } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - if (pRsp->reqId != pTask->checkReqId) return -1; + if (pRsp->reqId != pTask->checkReqId) { + return -1; + } + streamTaskLaunchRecover(pTask, version); } else { ASSERT(0); @@ -160,28 +166,29 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* // common int32_t streamSetParamForRecover(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamSetParamForRecover(exec); } int32_t streamRestoreParam(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamRestoreParam(exec); } + int32_t streamSetStatusNormal(SStreamTask* pTask) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); return 0; } // source int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamSourceRecoverStep1(exec, ver); } int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) { pReq->msgHead.vgId = pTask->nodeId; - pReq->streamId = pTask->streamId; - pReq->taskId = pTask->taskId; + pReq->streamId = pTask->id.streamId; + pReq->taskId = pTask->id.taskId; return 0; } @@ -192,13 +199,13 @@ int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) { int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) { pReq->msgHead.vgId = pTask->nodeId; - pReq->streamId = pTask->streamId; - pReq->taskId = pTask->taskId; + pReq->streamId = pTask->id.streamId; + pReq->taskId = pTask->id.taskId; return 0; } int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; if (qStreamSourceRecoverStep2(exec, ver) < 0) { } return streamScanExec(pTask, 100); @@ -206,7 +213,7 @@ int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { SStreamRecoverFinishReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .childId = pTask->selfChildId, }; // serialize @@ -227,13 +234,13 @@ int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { // agg int32_t streamAggRecoverPrepare(SStreamTask* pTask) { - void* exec = pTask->exec.executor; pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo); + qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream); return 0; } int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; if (qStreamRestoreParam(exec) < 0) { return -1; } @@ -247,6 +254,7 @@ int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) { if (pTask->taskLevel == TASK_LEVEL__AGG) { int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1); + qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left); ASSERT(left >= 0); if (left == 0) { streamAggChildrenRecoverFinish(pTask); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 411726075e..7bea989e3a 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -121,7 +121,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int char statePath[1024]; if (!specPath) { - sprintf(statePath, "%s/%d", path, pTask->taskId); + sprintf(statePath, "%s/%d", path, pTask->id.taskId); } else { memset(statePath, 0, 1024); tstrncpy(statePath, path, 1024); @@ -193,6 +193,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int } pState->pTdbState->pOwner = pTask; + pState->checkPointId = 0; return pState; @@ -243,6 +244,7 @@ int32_t streamStateCommit(SStreamState* pState) { TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } + pState->checkPointId++; return 0; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index e9aba0bc39..67c60008fd 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -15,15 +15,22 @@ #include "executor.h" #include "tstream.h" +#include "wal.h" -SStreamTask* tNewSStreamTask(int64_t streamId) { +SStreamTask* tNewStreamTask(int64_t streamId) { SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { return NULL; } - pTask->taskId = tGenIdPI32(); - pTask->streamId = streamId; - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; + + pTask->id.taskId = tGenIdPI32(); + pTask->id.streamId = streamId; + + char buf[128] = {0}; + sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId); + + pTask->id.idStr = taosStrdup(buf); + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; @@ -48,24 +55,24 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { return 0; } -int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { +int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tStartEncode(pEncoder) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->streamId) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->taskId) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->taskStatus) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->schedStatus) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1; if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->recoverSnapVer) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->startVer) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1; int32_t epSz = taosArrayGetSize(pTask->childEpInfo); @@ -101,24 +108,24 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { return pEncoder->pos; } -int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { +int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->taskId) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->taskStatus) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->schedStatus) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->recoverSnapVer) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->startVer) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1; int32_t epSz; @@ -162,24 +169,47 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { return 0; } -void tFreeSStreamTask(SStreamTask* pTask) { - qDebug("free stream task %d", pTask->taskId); - if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); - if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); - if (pTask->exec.qmsg) taosMemoryFree(pTask->exec.qmsg); - if (pTask->exec.executor) qDestroyTask(pTask->exec.executor); +void tFreeStreamTask(SStreamTask* pTask) { + qDebug("free s-task:%s", pTask->id.idStr); + + if (pTask->inputQueue) { + streamQueueClose(pTask->inputQueue); + } + if (pTask->outputQueue) { + streamQueueClose(pTask->outputQueue); + } + if (pTask->exec.qmsg) { + taosMemoryFree(pTask->exec.qmsg); + } + + if (pTask->exec.pExecutor) { + qDestroyTask(pTask->exec.pExecutor); + pTask->exec.pExecutor = NULL; + } + + if (pTask->exec.pWalReader != NULL) { + walCloseReader(pTask->exec.pWalReader); + } + taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); if (pTask->outputType == TASK_OUTPUT__TABLE) { tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); } + if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; } - if (pTask->pState) streamStateClose(pTask->pState); + if (pTask->pState) { + streamStateClose(pTask->pState); + } + + if (pTask->id.idStr != NULL) { + taosMemoryFree((void*)pTask->id.idStr); + } taosMemoryFree(pTask); } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index db4e3a4759..dc3ff3e6de 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -100,6 +100,8 @@ int32_t walNextValidMsg(SWalReader *pReader) { return -1; } +int64_t walReaderGetCurrentVer(const SWalReader *pReader) { return pReader->curVersion; } + static int64_t walReadSeekFilePos(SWalReader *pReader, int64_t fileFirstVer, int64_t ver) { int64_t ret = 0; diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index 631bcb443e..a49ff0cd5b 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -218,7 +218,7 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem int32_t queueNum = taosGetQueueNumber(pool->qset); int32_t curWorkerNum = taosArrayGetSize(pool->workers); int32_t dstWorkerNum = ceil(queueNum * pool->ratio); - if (dstWorkerNum < 1) dstWorkerNum = 1; + if (dstWorkerNum < 2) dstWorkerNum = 2; // spawn a thread to process queue while (curWorkerNum < dstWorkerNum) { @@ -248,7 +248,8 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem } taosThreadAttrDestroy(&thAttr); - uInfo("worker:%s:%d is launched, total:%d", pool->name, worker->id, (int32_t)taosArrayGetSize(pool->workers)); + int32_t numOfThreads = taosArrayGetSize(pool->workers); + uInfo("worker:%s:%d is launched, total:%d, expect:%d", pool->name, worker->id, numOfThreads, dstWorkerNum); curWorkerNum++; } diff --git a/tests/script/tsim/stream/basic1.sim b/tests/script/tsim/stream/basic1.sim index e69875d69f..15ca6bf7c9 100644 --- a/tests/script/tsim/stream/basic1.sim +++ b/tests/script/tsim/stream/basic1.sim @@ -37,7 +37,7 @@ if $loop_count == 20 then endi if $rows != 4 then - print =====rows=$rows + print =====rows=$rows, expect 4 goto loop0 endi @@ -53,7 +53,7 @@ if $data02 != 2 then endi if $data03 != 5 then - print =====data03=$data03 + print =====data03=$data03, expect:5 goto loop0 endi