diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index c407f1ddcb..82e57daf56 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -821,13 +821,18 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { } // do recovery step 1 - streamSourceRecoverScanStep1(pTask); + tqDebug("s-task:%s start recover step 1 scan", pTask->id.idStr); + int64_t st = taosGetTimestampMs(); + streamSourceRecoverScanStep1(pTask); if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } + double el = (taosGetTimestampMs() - st) / 1000.0; + tqDebug("s-task:%s recover step 1 ended, elapsed time:%.2fs", pTask->id.idStr, el); + // build msg to launch next step SStreamRecoverStep2Req req; code = streamBuildSourceRecover2Req(pTask, &req); @@ -853,20 +858,17 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { memcpy(serializedReq, &req, len); // dispatch msg + tqDebug("s-task:%s start recover block stage", pTask->id.idStr); + SRpcMsg rpcMsg = { - .code = 0, - .contLen = len, - .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, - .pCont = serializedReq, - }; - + .code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq}; tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg); - return 0; } int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - int32_t code; + int32_t code = 0; + SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); if (pTask == NULL) { diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 403a08a767..b5fae3184a 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -107,7 +107,7 @@ int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { if (streamTaskShouldStop(&pTask->status) || status == TASK_STATUS__RECOVER_PREPARE || status == TASK_STATUS__WAIT_DOWNSTREAM || status == TASK_STATUS__PAUSE) { - tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, status); + tqDebug("s-task:%s not ready for new submit block from wal, status:%d", pTask->id.idStr, status); streamMetaReleaseTask(pStreamMeta, pTask); continue; } diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index 0f00a5acb8..8e243a8bd1 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -130,6 +130,7 @@ int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMeta tqError("vgId:%d, task exec error since %s", pTq->pVnode->config.vgId, terrstr()); return -1; } + tqDebug("tmqsnap task execute end, get %p", pDataBlock); if (pDataBlock != NULL && pDataBlock->info.rows > 0) { diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 3925de2764..879395cd57 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -39,8 +39,6 @@ #define GET_RES_WINDOW_KEY_LEN(_l) ((_l) + sizeof(uint64_t)) -#define GET_TASKID(_t) (((SExecTaskInfo*)(_t))->id.str) - typedef struct SGroupResInfo { int32_t index; SArray* pRows; // SArray diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index d22a7460bb..a4f1e2ef94 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -12,14 +12,75 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ - -#ifndef _TD_EXECUTOR_INT_H -#define _TD_EXECUTOR_INT_H +#ifndef TDENGINE_EXECUTORINT_H +#define TDENGINE_EXECUTORINT_H #ifdef __cplusplus extern "C" { #endif +#include "os.h" +#include "tcommon.h" +#include "tlosertree.h" +#include "tsort.h" +#include "ttszip.h" +#include "tvariant.h" + +#include "dataSinkMgt.h" +#include "executil.h" +#include "executor.h" +#include "planner.h" +#include "scalar.h" +#include "taosdef.h" +#include "tarray.h" +#include "tfill.h" +#include "thash.h" +#include "tlockfree.h" +#include "tmsg.h" +#include "tpagedbuf.h" +#include "tstream.h" +#include "tstreamUpdate.h" + +#include "vnode.h" + +typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); + +#define IS_VALID_SESSION_WIN(winInfo) ((winInfo).sessionWin.win.skey > 0) +#define SET_SESSION_WIN_INVALID(winInfo) ((winInfo).sessionWin.win.skey = INT64_MIN) +#define IS_INVALID_SESSION_WIN_KEY(winKey) ((winKey).win.skey <= 0) +#define SET_SESSION_WIN_KEY_INVALID(pWinKey) ((pWinKey)->win.skey = INT64_MIN) + +/** + * If the number of generated results is greater than this value, + * query query will be halt and return results to client immediate. + */ +typedef struct SResultInfo { // TODO refactor + int64_t totalRows; // total generated result size in rows + int64_t totalBytes; // total results in bytes. + int32_t capacity; // capacity of current result output buffer + int32_t threshold; // result size threshold in rows. +} SResultInfo; + +typedef struct STableQueryInfo { + TSKEY lastKey; // last check ts, todo remove it later + SResultRowPosition pos; // current active time window +} STableQueryInfo; + +typedef struct SLimit { + int64_t limit; + int64_t offset; +} SLimit; + +typedef struct STableScanAnalyzeInfo SFileBlockLoadRecorder; + +enum { + STREAM_RECOVER_STEP__NONE = 0, + STREAM_RECOVER_STEP__PREPARE1, + STREAM_RECOVER_STEP__PREPARE2, + STREAM_RECOVER_STEP__SCAN1, + STREAM_RECOVER_STEP__SCAN2, +}; + extern int32_t exchangeObjRefPool; typedef struct { @@ -29,9 +90,584 @@ typedef struct { int32_t bytes; } SGroupKeys, SStateKeys; +typedef struct { + char* tablename; + char* dbname; + int32_t tversion; + SSchemaWrapper* sw; + SSchemaWrapper* qsw; +} SSchemaInfo; + +typedef struct SExchangeOpStopInfo { + int32_t operatorType; + int64_t refId; +} SExchangeOpStopInfo; + +typedef struct SExprSupp { + SExprInfo* pExprInfo; + int32_t numOfExprs; // the number of scalar expression in group operator + SqlFunctionCtx* pCtx; + int32_t* rowEntryInfoOffset; // offset value for each row result cell info + SFilterInfo* pFilterInfo; +} SExprSupp; + +typedef enum { + EX_SOURCE_DATA_NOT_READY = 0x1, + EX_SOURCE_DATA_READY = 0x2, + EX_SOURCE_DATA_EXHAUSTED = 0x3, +} EX_SOURCE_STATUS; + +#define COL_MATCH_FROM_COL_ID 0x1 +#define COL_MATCH_FROM_SLOT_ID 0x2 + +typedef struct SLoadRemoteDataInfo { + uint64_t totalSize; // total load bytes from remote + uint64_t totalRows; // total number of rows + uint64_t totalElapsed; // total elapsed time +} SLoadRemoteDataInfo; + +typedef struct SLimitInfo { + SLimit limit; + SLimit slimit; + uint64_t currentGroupId; + int64_t remainGroupOffset; + int64_t numOfOutputGroups; + int64_t remainOffset; + int64_t numOfOutputRows; +} SLimitInfo; + +typedef struct SExchangeInfo { + SArray* pSources; + SArray* pSourceDataInfo; + tsem_t ready; + void* pTransporter; + + // SArray, result block list, used to keep the multi-block that + // passed by downstream operator + SArray* pResultBlockList; + SArray* pRecycledBlocks; // build a pool for small data block to avoid to repeatly create and then destroy. + SSDataBlock* pDummyBlock; // dummy block, not keep data + bool seqLoadData; // sequential load data or not, false by default + int32_t current; + SLoadRemoteDataInfo loadInfo; + uint64_t self; + SLimitInfo limitInfo; + int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo +} SExchangeInfo; + +typedef struct SScanInfo { + int32_t numOfAsc; + int32_t numOfDesc; +} SScanInfo; + +typedef struct SSampleExecInfo { + double sampleRatio; // data block sample ratio, 1 by default + uint32_t seed; // random seed value +} SSampleExecInfo; + +enum { + TABLE_SCAN__TABLE_ORDER = 1, + TABLE_SCAN__BLOCK_ORDER = 2, +}; + +typedef struct SAggSupporter { + SSHashObj* pResultRowHashTable; // quick locate the window object for each result + char* keyBuf; // window key buffer + SDiskbasedBuf* pResultBuf; // query result buffer based on blocked-wised disk file + int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row + int32_t currentPageId; // current write page id +} SAggSupporter; + +typedef struct { + // if the upstream is an interval operator, the interval info is also kept here to get the time window to check if + // current data block needs to be loaded. + SInterval interval; + SAggSupporter* pAggSup; + SExprSupp* pExprSup; // expr supporter of aggregate operator +} SAggOptrPushDownInfo; + +typedef struct STableMetaCacheInfo { + SLRUCache* pTableMetaEntryCache; // 100 by default + uint64_t metaFetch; + uint64_t cacheHit; +} STableMetaCacheInfo; + +typedef struct STableScanBase { + STsdbReader* dataReader; + SFileBlockLoadRecorder readRecorder; + SQueryTableDataCond cond; + SAggOptrPushDownInfo pdInfo; + SColMatchInfo matchInfo; + SReadHandle readHandle; + SExprSupp pseudoSup; + STableMetaCacheInfo metaCache; + int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan + int32_t dataBlockLoadFlag; + SLimitInfo limitInfo; + // there are more than one table list exists in one task, if only one vnode exists. + STableListInfo* pTableListInfo; +} STableScanBase; + +typedef struct STableScanInfo { + STableScanBase base; + SScanInfo scanInfo; + int32_t scanTimes; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + int32_t currentGroupId; + int32_t currentTable; + int8_t scanMode; + int8_t assignBlockUid; + bool hasGroupByTag; + bool countOnly; +} STableScanInfo; + +typedef struct STableMergeScanInfo { + int32_t tableStartIndex; + int32_t tableEndIndex; + bool hasGroupId; + uint64_t groupId; + SArray* queryConds; // array of queryTableDataCond + STableScanBase base; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SArray* pSortInfo; + SSortHandle* pSortHandle; + SSDataBlock* pSortInputBlock; + int64_t startTs; // sort start time + SArray* sortSourceParams; + SLimitInfo limitInfo; + int64_t numOfRows; + SScanInfo scanInfo; + int32_t scanTimes; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + SSortExecInfo sortExecInfo; +} STableMergeScanInfo; + +typedef struct STagScanInfo { + SColumnInfo* pCols; + SSDataBlock* pRes; + SColMatchInfo matchInfo; + int32_t curPos; + SReadHandle readHandle; + STableListInfo* pTableListInfo; +} STagScanInfo; + +typedef enum EStreamScanMode { + STREAM_SCAN_FROM_READERHANDLE = 1, + STREAM_SCAN_FROM_RES, + STREAM_SCAN_FROM_UPDATERES, + STREAM_SCAN_FROM_DELETE_DATA, + STREAM_SCAN_FROM_DATAREADER_RETRIEVE, + STREAM_SCAN_FROM_DATAREADER_RANGE, +} EStreamScanMode; + +enum { + PROJECT_RETRIEVE_CONTINUE = 0x1, + PROJECT_RETRIEVE_DONE = 0x2, +}; + +typedef struct SStreamAggSupporter { + int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row + SSDataBlock* pScanBlock; + SStreamState* pState; + int64_t gap; // stream session window gap + SqlFunctionCtx* pDummyCtx; // for combine + SSHashObj* pResultRows; + int32_t stateKeySize; + int16_t stateKeyType; + SDiskbasedBuf* pResultBuf; +} SStreamAggSupporter; + +typedef struct SWindowSupporter { + SStreamAggSupporter* pStreamAggSup; + int64_t gap; + uint16_t parentType; + SAggSupporter* pIntervalAggSup; +} SWindowSupporter; + +typedef struct SPartitionBySupporter { + SArray* pGroupCols; // group by columns, SArray + SArray* pGroupColVals; // current group column values, SArray + char* keyBuf; // group by keys for hash + bool needCalc; // partition by column +} SPartitionBySupporter; + +typedef struct SPartitionDataInfo { + uint64_t groupId; + char* tbname; + SArray* tags; + SArray* rowIds; +} SPartitionDataInfo; + +typedef struct STimeWindowAggSupp { + int8_t calTrigger; + int8_t calTriggerSaved; + int64_t deleteMark; + int64_t deleteMarkSaved; + int64_t waterMark; + TSKEY maxTs; + TSKEY minTs; + SColumnInfoData timeWindowData; // query time window info for scalar function execution. +} STimeWindowAggSupp; + +typedef struct SStreamScanInfo { + SExprInfo* pPseudoExpr; + int32_t numOfPseudoExpr; + SExprSupp tbnameCalSup; + SExprSupp tagCalSup; + int32_t primaryTsIndex; // primary time stamp slot id + SReadHandle readHandle; + SInterval interval; // if the upstream is an interval operator, the interval info is also kept here. + SColMatchInfo matchInfo; + + SArray* pBlockLists; // multiple SSDatablock. + SSDataBlock* pRes; // result SSDataBlock + SSDataBlock* pUpdateRes; // update SSDataBlock + int32_t updateResIndex; + int32_t blockType; // current block type + int32_t validBlockIndex; // Is current data has returned? + uint64_t numOfExec; // execution times + STqReader* tqReader; + + uint64_t groupId; + SUpdateInfo* pUpdateInfo; + + EStreamScanMode scanMode; + struct SOperatorInfo* pStreamScanOp; + struct SOperatorInfo* pTableScanOp; + SArray* childIds; + SWindowSupporter windowSup; + SPartitionBySupporter partitionSup; + SExprSupp* pPartScalarSup; + bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA. + int32_t scanWinIndex; // for state operator + int32_t pullDataResIndex; + SSDataBlock* pPullDataRes; // pull data SSDataBlock + SSDataBlock* pDeleteDataRes; // delete data SSDataBlock + int32_t deleteDataIndex; + STimeWindow updateWin; + STimeWindowAggSupp twAggSup; + SSDataBlock* pUpdateDataRes; + // status for tmq + SNodeList* pGroupTags; + SNode* pTagCond; + SNode* pTagIndexCond; + + // recover + int32_t blockRecoverContiCnt; + int32_t blockRecoverTotCnt; + SSDataBlock* pRecoverRes; + + SSDataBlock* pCreateTbRes; + int8_t igCheckUpdate; + int8_t igExpired; +} SStreamScanInfo; + +typedef struct { + SVnode* vnode; + SSDataBlock pRes; // result SSDataBlock + STsdbReader* dataReader; + SSnapContext* sContext; + STableListInfo* pTableListInfo; +} SStreamRawScanInfo; + +typedef struct STableCountScanSupp { + int16_t dbNameSlotId; + int16_t stbNameSlotId; + int16_t tbCountSlotId; + bool groupByDbName; + bool groupByStbName; + char dbNameFilter[TSDB_DB_NAME_LEN]; + char stbNameFilter[TSDB_TABLE_NAME_LEN]; +} STableCountScanSupp; + +typedef struct SOptrBasicInfo { + SResultRowInfo resultRowInfo; + SSDataBlock* pRes; + bool mergeResultBlock; +} SOptrBasicInfo; + +typedef struct SIntervalAggOperatorInfo { + SOptrBasicInfo binfo; // basic info + SAggSupporter aggSup; // aggregate supporter + SExprSupp scalarSupp; // supporter for perform scalar function + SGroupResInfo groupResInfo; // multiple results build supporter + SInterval interval; // interval info + int32_t primaryTsIndex; // primary time stamp slot id from result of downstream operator. + STimeWindow win; // query time range + bool timeWindowInterpo; // interpolation needed or not + SArray* pInterpCols; // interpolation columns + int32_t resultTsOrder; // result timestamp order + int32_t inputOrder; // input data ts order + EOPTR_EXEC_MODEL execModel; // operator execution model [batch model|stream model] + STimeWindowAggSupp twAggSup; + SArray* pPrevValues; // SArray used to keep the previous not null value for interpolation. +} SIntervalAggOperatorInfo; + +typedef struct SMergeAlignedIntervalAggOperatorInfo { + SIntervalAggOperatorInfo* intervalAggOperatorInfo; + + uint64_t groupId; // current groupId + int64_t curTs; // current ts + SSDataBlock* prefetchedBlock; + SResultRow* pResultRow; +} SMergeAlignedIntervalAggOperatorInfo; + +typedef struct SStreamIntervalOperatorInfo { + SOptrBasicInfo binfo; // basic info + SAggSupporter aggSup; // aggregate supporter + SExprSupp scalarSupp; // supporter for perform scalar function + SGroupResInfo groupResInfo; // multiple results build supporter + SInterval interval; // interval info + int32_t primaryTsIndex; // primary time stamp slot id from result of downstream operator. + STimeWindowAggSupp twAggSup; + bool invertible; + bool ignoreExpiredData; + bool ignoreExpiredDataSaved; + SArray* pDelWins; // SWinRes + int32_t delIndex; + SSDataBlock* pDelRes; + SPhysiNode* pPhyNode; // create new child + SHashObj* pPullDataMap; + SArray* pPullWins; // SPullWindowInfo + int32_t pullIndex; + SSDataBlock* pPullDataRes; + bool isFinal; + SArray* pChildren; + SStreamState* pState; + SWinKey delKey; + uint64_t numOfDatapack; + SArray* pUpdated; + SSHashObj* pUpdatedMap; + int64_t dataVersion; +} SStreamIntervalOperatorInfo; + +typedef struct SDataGroupInfo { + uint64_t groupId; + int64_t numOfRows; + SArray* pPageList; +} SDataGroupInfo; + +typedef struct SWindowRowsSup { + STimeWindow win; + TSKEY prevTs; + int32_t startRowIndex; + int32_t numOfRows; + uint64_t groupId; +} SWindowRowsSup; + +typedef struct SResultWindowInfo { + void* pOutputBuf; + SSessionKey sessionWin; + bool isOutput; +} SResultWindowInfo; + +typedef struct SStreamSessionAggOperatorInfo { + SOptrBasicInfo binfo; + SStreamAggSupporter streamAggSup; + SExprSupp scalarSupp; // supporter for perform scalar function + SGroupResInfo groupResInfo; + int32_t primaryTsIndex; // primary timestamp slot id + int32_t endTsIndex; // window end timestamp slot id + int32_t order; // current SSDataBlock scan order + STimeWindowAggSupp twAggSup; + SSDataBlock* pWinBlock; // window result + SSDataBlock* pDelRes; // delete result + SSDataBlock* pUpdateRes; // update window + bool returnUpdate; + SSHashObj* pStDeleted; + void* pDelIterator; + SArray* pChildren; // cache for children's result; final stream operator + SPhysiNode* pPhyNode; // create new child + bool isFinal; + bool ignoreExpiredData; + bool ignoreExpiredDataSaved; + SArray* pUpdated; + SSHashObj* pStUpdated; + int64_t dataVersion; +} SStreamSessionAggOperatorInfo; + +typedef struct SStreamStateAggOperatorInfo { + SOptrBasicInfo binfo; + SStreamAggSupporter streamAggSup; + SExprSupp scalarSupp; // supporter for perform scalar function + SGroupResInfo groupResInfo; + int32_t primaryTsIndex; // primary timestamp slot id + STimeWindowAggSupp twAggSup; + SColumn stateCol; + SSDataBlock* pDelRes; + SSHashObj* pSeDeleted; + void* pDelIterator; + SArray* pChildren; // cache for children's result; + bool ignoreExpiredData; + bool ignoreExpiredDataSaved; + SArray* pUpdated; + SSHashObj* pSeUpdated; + int64_t dataVersion; +} SStreamStateAggOperatorInfo; + +typedef struct SStreamPartitionOperatorInfo { + SOptrBasicInfo binfo; + SPartitionBySupporter partitionSup; + SExprSupp scalarSup; + SExprSupp tbnameCalSup; + SExprSupp tagCalSup; + SHashObj* pPartitions; + void* parIte; + void* pTbNameIte; + SSDataBlock* pInputDataBlock; + int32_t tsColIndex; + SSDataBlock* pDelRes; + SSDataBlock* pCreateTbRes; +} SStreamPartitionOperatorInfo; + +typedef struct SStreamFillSupporter { + int32_t type; // fill type + SInterval interval; + SResultRowData prev; + SResultRowData cur; + SResultRowData next; + SResultRowData nextNext; + SFillColInfo* pAllColInfo; // fill exprs and not fill exprs + SExprSupp notFillExprSup; + int32_t numOfAllCols; // number of all exprs, including the tags columns + int32_t numOfFillCols; + int32_t numOfNotFillCols; + int32_t rowSize; + SSHashObj* pResMap; + bool hasDelete; +} SStreamFillSupporter; + +typedef struct SStreamFillOperatorInfo { + SStreamFillSupporter* pFillSup; + SSDataBlock* pRes; + SSDataBlock* pSrcBlock; + int32_t srcRowIndex; + SSDataBlock* pSrcDelBlock; + int32_t srcDelRowIndex; + SSDataBlock* pDelRes; + SColMatchInfo matchInfo; + int32_t primaryTsCol; + int32_t primarySrcSlotId; + SStreamFillInfo* pFillInfo; +} SStreamFillOperatorInfo; + +#define OPTR_IS_OPENED(_optr) (((_optr)->status & OP_OPENED) == OP_OPENED) +#define OPTR_SET_OPENED(_optr) ((_optr)->status |= OP_OPENED) + +SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode); +int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo); +void cleanupQueriedTableScanInfo(SSchemaInfo* pSchemaInfo); + +void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); +void cleanupBasicInfo(SOptrBasicInfo* pInfo); + +int32_t initExprSupp(SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfExpr); +void cleanupExprSupp(SExprSupp* pSup); + +void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); + +int32_t initAggSup(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols, size_t keyBufSize, + const char* pkey, void* pState); +void cleanupAggSup(SAggSupporter* pAggSup); + +void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); + +void doBuildStreamResBlock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, + SDiskbasedBuf* pBuf); +void doBuildResultDatablock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, + SDiskbasedBuf* pBuf); + +bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo); +bool hasSlimitOffsetInfo(SLimitInfo* pLimitInfo); +void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo); +void resetLimitInfoForNextGroup(SLimitInfo* pLimitInfo); +bool applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); + +void applyAggFunctionOnPartialTuples(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, + int32_t offset, int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput); + +int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pColList, char** pNextStart); +void updateLoadRemoteInfo(SLoadRemoteDataInfo* pInfo, int64_t numOfRows, int32_t dataLen, int64_t startTs, + struct SOperatorInfo* pOperator); + +STimeWindow getFirstQualifiedTimeWindow(int64_t ts, STimeWindow* pWindow, SInterval* pInterval, int32_t order); +int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz); + +extern void doDestroyExchangeOperatorInfo(void* param); + +void doFilter(SSDataBlock* pBlock, SFilterInfo* pFilterInfo, SColMatchInfo* pColMatchInfo); +int32_t addTagPseudoColumnData(SReadHandle* pHandle, const SExprInfo* pExpr, int32_t numOfExpr, SSDataBlock* pBlock, + int32_t rows, const char* idStr, STableMetaCacheInfo* pCache); + +void appendOneRowToDataBlock(SSDataBlock* pBlock, STupleHandle* pTupleHandle); +void setTbNameColData(const SSDataBlock* pBlock, SColumnInfoData* pColInfoData, int32_t functionId, const char* name); + +void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset); +void clearResultRowInitFlag(SqlFunctionCtx* pCtx, int32_t numOfOutput); + +SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData, + int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo, + bool isIntervalQuery, SAggSupporter* pSup, bool keepGroup); + +int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBlock* pSrcBlock, SqlFunctionCtx* pCtx, + int32_t numOfOutput, SArray* pPseudoList); + +void setInputDataBlock(SExprSupp* pExprSupp, SSDataBlock* pBlock, int32_t order, int32_t scanFlag, bool createDummyCol); + +int32_t checkForQueryBuf(size_t numOfTables); + +int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, SExecTaskInfo* pTask, SReadHandle* readHandle); + +STimeWindow getActiveTimeWindow(SDiskbasedBuf* pBuf, SResultRowInfo* pResultRowInfo, int64_t ts, SInterval* pInterval, + int32_t order); +int32_t getNumOfRowsInTimeWindow(SDataBlockInfo* pDataBlockInfo, TSKEY* pPrimaryColumn, int32_t startPos, TSKEY ekey, + __block_search_fn_t searchFn, STableQueryInfo* item, int32_t order); +int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order); +SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, int32_t interBufSize); +void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, SSessionKey* pKey); +bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap); +bool functionNeedToExecute(SqlFunctionCtx* pCtx); +bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup); +bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup); +bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, SStreamState* pState, STimeWindowAggSupp* pTwSup); +void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, + uint64_t* pGp, void* pTbName); +uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); + +int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SExprSupp* pSup, + SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); + +bool groupbyTbname(SNodeList* pGroupList); +int32_t buildDataBlockFromGroupRes(struct SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo); +int32_t saveSessionDiscBuf(SStreamState* pState, SSessionKey* key, void* buf, int32_t size); +int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* pBlock, + SExprSupp* pSup, SGroupResInfo* pGroupResInfo); +int32_t setOutputBuf(SStreamState* pState, STimeWindow* win, SResultRow** pResult, int64_t tableGroupId, + SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset, SAggSupporter* pAggSup); +int32_t releaseOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult); +int32_t saveOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult, int32_t resSize); +void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order); +int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order, + int64_t* pData); +void appendCreateTableRow(SStreamState* pState, SExprSupp* pTableSup, SExprSupp* pTagSup, uint64_t groupId, + SSDataBlock* pSrcBlock, int32_t rowId, SSDataBlock* pDestBlock); + +SSDataBlock* buildCreateTableBlock(SExprSupp* tbName, SExprSupp* tag); +SExprInfo* createExpr(SNodeList* pNodeList, int32_t* numOfExprs); + +void copyResultrowToDataBlock(SExprInfo* pExprInfo, int32_t numOfExprs, SResultRow* pRow, SqlFunctionCtx* pCtx, + SSDataBlock* pBlock, const int32_t* rowEntryOffset, SExecTaskInfo* pTaskInfo); +void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExprs, const int32_t* rowEntryOffset); +void doClearBufferedBlocks(SStreamScanInfo* pInfo); + uint64_t calcGroupId(char* pData, int32_t len); + #ifdef __cplusplus } #endif -#endif /*_TD_EXECUTOR_INT_H*/ \ No newline at end of file +#endif // TDENGINE_EXECUTORINT_H diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index ae1e7743a5..e69de29bb2 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -1,882 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -#ifndef TDENGINE_EXECUTORIMPL_H -#define TDENGINE_EXECUTORIMPL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" -#include "tcommon.h" -#include "tlosertree.h" -#include "tsort.h" -#include "ttszip.h" -#include "tvariant.h" - -#include "dataSinkMgt.h" -#include "executil.h" -#include "executor.h" -#include "planner.h" -#include "scalar.h" -#include "taosdef.h" -#include "tarray.h" -#include "tfill.h" -#include "thash.h" -#include "tlockfree.h" -#include "tmsg.h" -#include "tpagedbuf.h" -#include "tstream.h" -#include "tstreamUpdate.h" - -#include "executorInt.h" -#include "vnode.h" - -typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); - -#define IS_VALID_SESSION_WIN(winInfo) ((winInfo).sessionWin.win.skey > 0) -#define SET_SESSION_WIN_INVALID(winInfo) ((winInfo).sessionWin.win.skey = INT64_MIN) -#define IS_INVALID_SESSION_WIN_KEY(winKey) ((winKey).win.skey <= 0) -#define SET_SESSION_WIN_KEY_INVALID(pWinKey) ((pWinKey)->win.skey = INT64_MIN) - -enum { - // when this task starts to execute, this status will set - TASK_NOT_COMPLETED = 0x1u, - - /* Task is over - * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc. - * 2. when all data within queried time window, it is also denoted as query_completed - */ - TASK_COMPLETED = 0x2u, -}; - -/** - * If the number of generated results is greater than this value, - * query query will be halt and return results to client immediate. - */ -typedef struct SResultInfo { // TODO refactor - int64_t totalRows; // total generated result size in rows - int64_t totalBytes; // total results in bytes. - int32_t capacity; // capacity of current result output buffer - int32_t threshold; // result size threshold in rows. -} SResultInfo; - -typedef struct STableQueryInfo { - TSKEY lastKey; // last check ts, todo remove it later - SResultRowPosition pos; // current active time window -} STableQueryInfo; - -typedef struct SLimit { - int64_t limit; - int64_t offset; -} SLimit; - -typedef struct STableScanAnalyzeInfo SFileBlockLoadRecorder; - -typedef struct STaskCostInfo { - int64_t created; - int64_t start; - uint64_t elapsedTime; - double extractListTime; - double groupIdMapTime; - SFileBlockLoadRecorder* pRecoder; -} STaskCostInfo; - -typedef struct SOperatorCostInfo { - double openCost; - double totalCost; -} SOperatorCostInfo; - -struct SOperatorInfo; - -typedef int32_t (*__optr_encode_fn_t)(struct SOperatorInfo* pOperator, char** result, int32_t* length); -typedef int32_t (*__optr_decode_fn_t)(struct SOperatorInfo* pOperator, char* result); - -typedef int32_t (*__optr_open_fn_t)(struct SOperatorInfo* pOptr); -typedef SSDataBlock* (*__optr_fn_t)(struct SOperatorInfo* pOptr); -typedef void (*__optr_close_fn_t)(void* param); -typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len); -typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr); - -typedef struct STaskIdInfo { - uint64_t queryId; // this is also a request id - uint64_t subplanId; - uint64_t templateId; - char* str; - int32_t vgId; -} STaskIdInfo; - -enum { - STREAM_RECOVER_STEP__NONE = 0, - STREAM_RECOVER_STEP__PREPARE1, - STREAM_RECOVER_STEP__PREPARE2, - STREAM_RECOVER_STEP__SCAN1, - STREAM_RECOVER_STEP__SCAN2, -}; - -typedef struct { - STqOffsetVal currentOffset; // for tmq - SMqMetaRsp metaRsp; // for tmq fetching meta - int64_t snapshotVer; - SPackedData submit; - SSchemaWrapper* schema; - char tbName[TSDB_TABLE_NAME_LEN]; - int8_t recoverStep; - int8_t recoverScanFinished; - SQueryTableDataCond tableCond; - int64_t fillHistoryVer1; - int64_t fillHistoryVer2; - int64_t dataVersion; - SStreamState* pState; - int64_t checkPointId; -} SStreamTaskInfo; - -typedef struct { - char* tablename; - char* dbname; - int32_t tversion; - SSchemaWrapper* sw; - SSchemaWrapper* qsw; -} SSchemaInfo; - -typedef struct SExchangeOpStopInfo { - int32_t operatorType; - int64_t refId; -} SExchangeOpStopInfo; - -typedef struct STaskStopInfo { - SRWLatch lock; - SArray* pStopInfo; -} STaskStopInfo; - -struct SExecTaskInfo { - STaskIdInfo id; - uint32_t status; - STimeWindow window; - STaskCostInfo cost; - int64_t owner; // if it is in execution - int32_t code; - int32_t qbufQuota; // total available buffer (in KB) during execution query - int64_t version; // used for stream to record wal version, why not move to sschemainfo - SStreamTaskInfo streamInfo; - SSchemaInfo schemaInfo; - const char* sql; // query sql string - jmp_buf env; // jump to this position when error happens. - EOPTR_EXEC_MODEL execModel; // operator execution model [batch model|stream model] - SSubplan* pSubplan; - struct SOperatorInfo* pRoot; - SLocalFetch localFetch; - SArray* pResultBlockList; // result block list - STaskStopInfo stopInfo; - SRWLatch lock; // secure the access of STableListInfo -}; - -enum { - OP_NOT_OPENED = 0x0, - OP_OPENED = 0x1, - OP_RES_TO_RETURN = 0x5, - OP_EXEC_DONE = 0x9, -}; - -typedef struct SOperatorFpSet { - __optr_open_fn_t _openFn; // DO NOT invoke this function directly - __optr_fn_t getNextFn; - __optr_fn_t cleanupFn; // call this function to release the allocated resources ASAP - __optr_close_fn_t closeFn; - __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator - __optr_encode_fn_t encodeResultRow; - __optr_decode_fn_t decodeResultRow; - __optr_explain_fn_t getExplainFn; -} SOperatorFpSet; - -typedef struct SExprSupp { - SExprInfo* pExprInfo; - int32_t numOfExprs; // the number of scalar expression in group operator - SqlFunctionCtx* pCtx; - int32_t* rowEntryInfoOffset; // offset value for each row result cell info - SFilterInfo* pFilterInfo; -} SExprSupp; - -typedef struct SOperatorInfo { - uint16_t operatorType; - int16_t resultDataBlockId; - bool blocking; // block operator or not - uint8_t status; // denote if current operator is completed - char* name; // name, for debug purpose - void* info; // extension attribution - SExprSupp exprSupp; - SExecTaskInfo* pTaskInfo; - SOperatorCostInfo cost; - SResultInfo resultInfo; - struct SOperatorInfo** pDownstream; // downstram pointer list - int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator - SOperatorFpSet fpSet; -} SOperatorInfo; - -typedef enum { - EX_SOURCE_DATA_NOT_READY = 0x1, - EX_SOURCE_DATA_READY = 0x2, - EX_SOURCE_DATA_EXHAUSTED = 0x3, -} EX_SOURCE_STATUS; - -#define COL_MATCH_FROM_COL_ID 0x1 -#define COL_MATCH_FROM_SLOT_ID 0x2 - -typedef struct SLoadRemoteDataInfo { - uint64_t totalSize; // total load bytes from remote - uint64_t totalRows; // total number of rows - uint64_t totalElapsed; // total elapsed time -} SLoadRemoteDataInfo; - -typedef struct SLimitInfo { - SLimit limit; - SLimit slimit; - uint64_t currentGroupId; - int64_t remainGroupOffset; - int64_t numOfOutputGroups; - int64_t remainOffset; - int64_t numOfOutputRows; -} SLimitInfo; - -typedef struct SExchangeInfo { - SArray* pSources; - SArray* pSourceDataInfo; - tsem_t ready; - void* pTransporter; - - // SArray, result block list, used to keep the multi-block that - // passed by downstream operator - SArray* pResultBlockList; - SArray* pRecycledBlocks; // build a pool for small data block to avoid to repeatly create and then destroy. - SSDataBlock* pDummyBlock; // dummy block, not keep data - bool seqLoadData; // sequential load data or not, false by default - int32_t current; - SLoadRemoteDataInfo loadInfo; - uint64_t self; - SLimitInfo limitInfo; - int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo -} SExchangeInfo; - -typedef struct SScanInfo { - int32_t numOfAsc; - int32_t numOfDesc; -} SScanInfo; - -typedef struct SSampleExecInfo { - double sampleRatio; // data block sample ratio, 1 by default - uint32_t seed; // random seed value -} SSampleExecInfo; - -enum { - TABLE_SCAN__TABLE_ORDER = 1, - TABLE_SCAN__BLOCK_ORDER = 2, -}; - -typedef struct SAggSupporter { - SSHashObj* pResultRowHashTable; // quick locate the window object for each result - char* keyBuf; // window key buffer - SDiskbasedBuf* pResultBuf; // query result buffer based on blocked-wised disk file - int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row - int32_t currentPageId; // current write page id -} SAggSupporter; - -typedef struct { - // if the upstream is an interval operator, the interval info is also kept here to get the time window to check if - // current data block needs to be loaded. - SInterval interval; - SAggSupporter* pAggSup; - SExprSupp* pExprSup; // expr supporter of aggregate operator -} SAggOptrPushDownInfo; - -typedef struct STableMetaCacheInfo { - SLRUCache* pTableMetaEntryCache; // 100 by default - uint64_t metaFetch; - uint64_t cacheHit; -} STableMetaCacheInfo; - -typedef struct STableScanBase { - STsdbReader* dataReader; - SFileBlockLoadRecorder readRecorder; - SQueryTableDataCond cond; - SAggOptrPushDownInfo pdInfo; - SColMatchInfo matchInfo; - SReadHandle readHandle; - SExprSupp pseudoSup; - STableMetaCacheInfo metaCache; - int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan - int32_t dataBlockLoadFlag; - SLimitInfo limitInfo; - // there are more than one table list exists in one task, if only one vnode exists. - STableListInfo* pTableListInfo; -} STableScanBase; - -typedef struct STableScanInfo { - STableScanBase base; - SScanInfo scanInfo; - int32_t scanTimes; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - int32_t currentGroupId; - int32_t currentTable; - int8_t scanMode; - int8_t assignBlockUid; - bool hasGroupByTag; - bool countOnly; -} STableScanInfo; - -typedef struct STableMergeScanInfo { - int32_t tableStartIndex; - int32_t tableEndIndex; - bool hasGroupId; - uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond - STableScanBase base; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SArray* pSortInfo; - SSortHandle* pSortHandle; - SSDataBlock* pSortInputBlock; - int64_t startTs; // sort start time - SArray* sortSourceParams; - SLimitInfo limitInfo; - int64_t numOfRows; - SScanInfo scanInfo; - int32_t scanTimes; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - SSortExecInfo sortExecInfo; -} STableMergeScanInfo; - -typedef struct STagScanInfo { - SColumnInfo* pCols; - SSDataBlock* pRes; - SColMatchInfo matchInfo; - int32_t curPos; - SReadHandle readHandle; - STableListInfo* pTableListInfo; -} STagScanInfo; - -typedef enum EStreamScanMode { - STREAM_SCAN_FROM_READERHANDLE = 1, - STREAM_SCAN_FROM_RES, - STREAM_SCAN_FROM_UPDATERES, - STREAM_SCAN_FROM_DELETE_DATA, - STREAM_SCAN_FROM_DATAREADER_RETRIEVE, - STREAM_SCAN_FROM_DATAREADER_RANGE, -} EStreamScanMode; - -enum { - PROJECT_RETRIEVE_CONTINUE = 0x1, - PROJECT_RETRIEVE_DONE = 0x2, -}; - -typedef struct SStreamAggSupporter { - int32_t resultRowSize; // the result buffer size for each result row, with the meta data size for each row - SSDataBlock* pScanBlock; - SStreamState* pState; - int64_t gap; // stream session window gap - SqlFunctionCtx* pDummyCtx; // for combine - SSHashObj* pResultRows; - int32_t stateKeySize; - int16_t stateKeyType; - SDiskbasedBuf* pResultBuf; -} SStreamAggSupporter; - -typedef struct SWindowSupporter { - SStreamAggSupporter* pStreamAggSup; - int64_t gap; - uint16_t parentType; - SAggSupporter* pIntervalAggSup; -} SWindowSupporter; - -typedef struct SPartitionBySupporter { - SArray* pGroupCols; // group by columns, SArray - SArray* pGroupColVals; // current group column values, SArray - char* keyBuf; // group by keys for hash - bool needCalc; // partition by column -} SPartitionBySupporter; - -typedef struct SPartitionDataInfo { - uint64_t groupId; - char* tbname; - SArray* tags; - SArray* rowIds; -} SPartitionDataInfo; - -typedef struct STimeWindowAggSupp { - int8_t calTrigger; - int8_t calTriggerSaved; - int64_t deleteMark; - int64_t deleteMarkSaved; - int64_t waterMark; - TSKEY maxTs; - TSKEY minTs; - TSKEY checkPointTs; - TSKEY checkPointInterval; - SColumnInfoData timeWindowData; // query time window info for scalar function execution. -} STimeWindowAggSupp; - -typedef struct SStreamScanInfo { - SExprInfo* pPseudoExpr; - int32_t numOfPseudoExpr; - SExprSupp tbnameCalSup; - SExprSupp tagCalSup; - int32_t primaryTsIndex; // primary time stamp slot id - SReadHandle readHandle; - SInterval interval; // if the upstream is an interval operator, the interval info is also kept here. - SColMatchInfo matchInfo; - - SArray* pBlockLists; // multiple SSDatablock. - SSDataBlock* pRes; // result SSDataBlock - SSDataBlock* pUpdateRes; // update SSDataBlock - int32_t updateResIndex; - int32_t blockType; // current block type - int32_t validBlockIndex; // Is current data has returned? - uint64_t numOfExec; // execution times - STqReader* tqReader; - - uint64_t groupId; - SUpdateInfo* pUpdateInfo; - - EStreamScanMode scanMode; - SOperatorInfo* pStreamScanOp; - SOperatorInfo* pTableScanOp; - SArray* childIds; - SWindowSupporter windowSup; - SPartitionBySupporter partitionSup; - SExprSupp* pPartScalarSup; - bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA. - int32_t scanWinIndex; // for state operator - int32_t pullDataResIndex; - SSDataBlock* pPullDataRes; // pull data SSDataBlock - SSDataBlock* pDeleteDataRes; // delete data SSDataBlock - int32_t deleteDataIndex; - STimeWindow updateWin; - STimeWindowAggSupp twAggSup; - SSDataBlock* pUpdateDataRes; - // status for tmq - SNodeList* pGroupTags; - SNode* pTagCond; - SNode* pTagIndexCond; - - // recover - int32_t blockRecoverContiCnt; - int32_t blockRecoverTotCnt; - SSDataBlock* pRecoverRes; - - SSDataBlock* pCreateTbRes; - int8_t igCheckUpdate; - int8_t igExpired; - SStreamState* pState; -} SStreamScanInfo; - -typedef struct { - SVnode* vnode; - SSDataBlock pRes; // result SSDataBlock - STsdbReader* dataReader; - SSnapContext* sContext; - STableListInfo* pTableListInfo; -} SStreamRawScanInfo; - -typedef struct STableCountScanSupp { - int16_t dbNameSlotId; - int16_t stbNameSlotId; - int16_t tbCountSlotId; - bool groupByDbName; - bool groupByStbName; - char dbNameFilter[TSDB_DB_NAME_LEN]; - char stbNameFilter[TSDB_TABLE_NAME_LEN]; -} STableCountScanSupp; - -typedef struct SOptrBasicInfo { - SResultRowInfo resultRowInfo; - SSDataBlock* pRes; - bool mergeResultBlock; -} SOptrBasicInfo; - -typedef struct SIntervalAggOperatorInfo { - SOptrBasicInfo binfo; // basic info - SAggSupporter aggSup; // aggregate supporter - SExprSupp scalarSupp; // supporter for perform scalar function - SGroupResInfo groupResInfo; // multiple results build supporter - SInterval interval; // interval info - int32_t primaryTsIndex; // primary time stamp slot id from result of downstream operator. - STimeWindow win; // query time range - bool timeWindowInterpo; // interpolation needed or not - SArray* pInterpCols; // interpolation columns - int32_t resultTsOrder; // result timestamp order - int32_t inputOrder; // input data ts order - EOPTR_EXEC_MODEL execModel; // operator execution model [batch model|stream model] - STimeWindowAggSupp twAggSup; - SArray* pPrevValues; // SArray used to keep the previous not null value for interpolation. -} SIntervalAggOperatorInfo; - -typedef struct SMergeAlignedIntervalAggOperatorInfo { - SIntervalAggOperatorInfo* intervalAggOperatorInfo; - - uint64_t groupId; // current groupId - int64_t curTs; // current ts - SSDataBlock* prefetchedBlock; - SResultRow* pResultRow; -} SMergeAlignedIntervalAggOperatorInfo; - -typedef struct SStreamIntervalOperatorInfo { - SOptrBasicInfo binfo; // basic info - SAggSupporter aggSup; // aggregate supporter - SExprSupp scalarSupp; // supporter for perform scalar function - SGroupResInfo groupResInfo; // multiple results build supporter - SInterval interval; // interval info - int32_t primaryTsIndex; // primary time stamp slot id from result of downstream operator. - STimeWindowAggSupp twAggSup; - bool invertible; - bool ignoreExpiredData; - bool ignoreExpiredDataSaved; - SArray* pDelWins; // SWinRes - int32_t delIndex; - SSDataBlock* pDelRes; - SPhysiNode* pPhyNode; // create new child - SHashObj* pPullDataMap; - SArray* pPullWins; // SPullWindowInfo - int32_t pullIndex; - SSDataBlock* pPullDataRes; - bool isFinal; - SArray* pChildren; - int32_t numOfChild; - SStreamState* pState; - SWinKey delKey; - uint64_t numOfDatapack; - SArray* pUpdated; - SSHashObj* pUpdatedMap; - int64_t dataVersion; -} SStreamIntervalOperatorInfo; - -typedef struct SDataGroupInfo { - uint64_t groupId; - int64_t numOfRows; - SArray* pPageList; -} SDataGroupInfo; - -typedef struct SWindowRowsSup { - STimeWindow win; - TSKEY prevTs; - int32_t startRowIndex; - int32_t numOfRows; - uint64_t groupId; -} SWindowRowsSup; - -typedef struct SResultWindowInfo { - void* pOutputBuf; - SSessionKey sessionWin; - bool isOutput; -} SResultWindowInfo; - -typedef struct SStateWindowInfo { - SResultWindowInfo winInfo; - SStateKeys* pStateKey; -} SStateWindowInfo; - -typedef struct SStreamSessionAggOperatorInfo { - SOptrBasicInfo binfo; - SStreamAggSupporter streamAggSup; - SExprSupp scalarSupp; // supporter for perform scalar function - SGroupResInfo groupResInfo; - int32_t primaryTsIndex; // primary timestamp slot id - int32_t endTsIndex; // window end timestamp slot id - int32_t order; // current SSDataBlock scan order - STimeWindowAggSupp twAggSup; - SSDataBlock* pWinBlock; // window result - SSDataBlock* pDelRes; // delete result - SSDataBlock* pUpdateRes; // update window - bool returnUpdate; - SSHashObj* pStDeleted; - void* pDelIterator; - SArray* pChildren; // cache for children's result; final stream operator - SPhysiNode* pPhyNode; // create new child - bool isFinal; - bool ignoreExpiredData; - bool ignoreExpiredDataSaved; - SArray* pUpdated; - SSHashObj* pStUpdated; - int64_t dataVersion; -} SStreamSessionAggOperatorInfo; - -typedef struct SStreamStateAggOperatorInfo { - SOptrBasicInfo binfo; - SStreamAggSupporter streamAggSup; - SExprSupp scalarSupp; // supporter for perform scalar function - SGroupResInfo groupResInfo; - int32_t primaryTsIndex; // primary timestamp slot id - STimeWindowAggSupp twAggSup; - SColumn stateCol; - SSDataBlock* pDelRes; - SSHashObj* pSeDeleted; - void* pDelIterator; - SArray* pChildren; // cache for children's result; - bool ignoreExpiredData; - bool ignoreExpiredDataSaved; - SArray* pUpdated; - SSHashObj* pSeUpdated; - int64_t dataVersion; -} SStreamStateAggOperatorInfo; - -typedef struct SStreamPartitionOperatorInfo { - SOptrBasicInfo binfo; - SPartitionBySupporter partitionSup; - SExprSupp scalarSup; - SExprSupp tbnameCalSup; - SExprSupp tagCalSup; - SHashObj* pPartitions; - void* parIte; - void* pTbNameIte; - SSDataBlock* pInputDataBlock; - int32_t tsColIndex; - SSDataBlock* pDelRes; - SSDataBlock* pCreateTbRes; -} SStreamPartitionOperatorInfo; - -typedef struct SStreamFillSupporter { - int32_t type; // fill type - SInterval interval; - SResultRowData prev; - SResultRowData cur; - SResultRowData next; - SResultRowData nextNext; - SFillColInfo* pAllColInfo; // fill exprs and not fill exprs - SExprSupp notFillExprSup; - int32_t numOfAllCols; // number of all exprs, including the tags columns - int32_t numOfFillCols; - int32_t numOfNotFillCols; - int32_t rowSize; - SSHashObj* pResMap; - bool hasDelete; -} SStreamFillSupporter; - -typedef struct SStreamFillOperatorInfo { - SStreamFillSupporter* pFillSup; - SSDataBlock* pRes; - SSDataBlock* pSrcBlock; - int32_t srcRowIndex; - SSDataBlock* pSrcDelBlock; - int32_t srcDelRowIndex; - SSDataBlock* pDelRes; - SColMatchInfo matchInfo; - int32_t primaryTsCol; - int32_t primarySrcSlotId; - SStreamFillInfo* pFillInfo; -} SStreamFillOperatorInfo; - -#define OPTR_IS_OPENED(_optr) (((_optr)->status & OP_OPENED) == OP_OPENED) -#define OPTR_SET_OPENED(_optr) ((_optr)->status |= OP_OPENED) - -SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model, - char* dbFName); - -SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, - __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain); -int32_t optrDummyOpenFn(SOperatorInfo* pOperator); -int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num); -void setOperatorCompleted(SOperatorInfo* pOperator); -void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32_t type, bool blocking, int32_t status, - void* pInfo, SExecTaskInfo* pTaskInfo); -void destroyOperatorInfo(SOperatorInfo* pOperator); -int32_t optrDefaultBufFn(SOperatorInfo* pOperator); - -void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); -void cleanupBasicInfo(SOptrBasicInfo* pInfo); - -int32_t initExprSupp(SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfExpr); -void cleanupExprSupp(SExprSupp* pSup); - -void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); - -int32_t initAggSup(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols, size_t keyBufSize, - const char* pkey, void* pState); -void cleanupAggSup(SAggSupporter* pAggSup); - -void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); -void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, - SDiskbasedBuf* pBuf); - -bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo); -bool hasSlimitOffsetInfo(SLimitInfo* pLimitInfo); -void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo); -void resetLimitInfoForNextGroup(SLimitInfo* pLimitInfo); -bool applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); - -void applyAggFunctionOnPartialTuples(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, - int32_t offset, int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput); - -int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pColList, char** pNextStart); -void updateLoadRemoteInfo(SLoadRemoteDataInfo* pInfo, int64_t numOfRows, int32_t dataLen, int64_t startTs, - SOperatorInfo* pOperator); - -STimeWindow getFirstQualifiedTimeWindow(int64_t ts, STimeWindow* pWindow, SInterval* pInterval, int32_t order); - -SOperatorInfo* extractOperatorInTree(SOperatorInfo* pOperator, int32_t type, const char* id); -int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag, bool inheritUsOrder); -int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz); - -extern void doDestroyExchangeOperatorInfo(void* param); - -void doFilter(SSDataBlock* pBlock, SFilterInfo* pFilterInfo, SColMatchInfo* pColMatchInfo); -int32_t addTagPseudoColumnData(SReadHandle* pHandle, const SExprInfo* pExpr, int32_t numOfExpr, SSDataBlock* pBlock, - int32_t rows, const char* idStr, STableMetaCacheInfo* pCache); - -void appendOneRowToDataBlock(SSDataBlock* pBlock, STupleHandle* pTupleHandle); -void setTbNameColData(const SSDataBlock* pBlock, SColumnInfoData* pColInfoData, int32_t functionId, const char* name); - -void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset); -void clearResultRowInitFlag(SqlFunctionCtx* pCtx, int32_t numOfOutput); - -SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData, - int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo, - bool isIntervalQuery, SAggSupporter* pSup, bool keepGroup); -// operator creater functions -// clang-format off -SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* pHandle, STableListInfo* pTableList, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createTagScanOperatorInfo(SReadHandle* pReadHandle, STagScanPhysiNode* pPhyNode, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScanPhysiNode* pScanPhyNode, const char* pUser, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createTableCountScanOperatorInfo(SReadHandle* handle, STableCountScanPhysiNode* pNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createIndefinitOutputOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* pSortNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** dowStreams, size_t numStreams, SMergePhysiNode* pMergePhysiNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMergeIntervalPhysiNode* pIntervalPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createMergeAlignedIntervalOperatorInfo(SOperatorInfo* downstream, SMergeAlignedIntervalPhysiNode* pNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); - -SOperatorInfo* createSessionAggOperatorInfo(SOperatorInfo* downstream, SSessionWinodwPhysiNode* pSessionNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createDataBlockInfoScanOperator(SReadHandle* readHandle, SBlockDistScanPhysiNode* pBlockScanNode, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createRawScanOperatorInfo(SReadHandle* pHandle, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SStateWinodwPhysiNode* pStateNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamPartitionOperatorInfo(SOperatorInfo* downstream, SStreamPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); - -SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createGroupSortOperatorInfo(SOperatorInfo* downstream, SGroupSortPhysiNode* pSortPhyNode, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNode* physiNode, SExecTaskInfo* pTaskInfo); -// clang-format on - -int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBlock* pSrcBlock, SqlFunctionCtx* pCtx, - int32_t numOfOutput, SArray* pPseudoList); - -void setInputDataBlock(SExprSupp* pExprSupp, SSDataBlock* pBlock, int32_t order, int32_t scanFlag, bool createDummyCol); - -int32_t checkForQueryBuf(size_t numOfTables); - -bool isTaskKilled(SExecTaskInfo* pTaskInfo); -void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); -void doDestroyTask(SExecTaskInfo* pTaskInfo); -void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); - -void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst); - -SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo); - -int32_t createExecTaskInfo(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, - int32_t vgId, char* sql, EOPTR_EXEC_MODEL model); -int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, SExecTaskInfo* pTask, SReadHandle* readHandle); -int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SArray* pExecInfoList); - -STimeWindow getActiveTimeWindow(SDiskbasedBuf* pBuf, SResultRowInfo* pResultRowInfo, int64_t ts, SInterval* pInterval, - int32_t order); -int32_t getNumOfRowsInTimeWindow(SDataBlockInfo* pDataBlockInfo, TSKEY* pPrimaryColumn, int32_t startPos, TSKEY ekey, - __block_search_fn_t searchFn, STableQueryInfo* item, int32_t order); -int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order); -SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, int32_t interBufSize); -void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, SSessionKey* pKey); -bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap); -bool functionNeedToExecute(SqlFunctionCtx* pCtx); -bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup); -bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup); -bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, SStreamState* pState, STimeWindowAggSupp* pTwSup); -void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, - uint64_t* pGp, void* pTbName); -uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); - -int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SExprSupp* pSup, - SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); - -bool groupbyTbname(SNodeList* pGroupList); -int32_t saveSessionDiscBuf(SStreamState* pState, SSessionKey* key, void* buf, int32_t size); -int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* pBlock, - SExprSupp* pSup, SGroupResInfo* pGroupResInfo); -int32_t releaseOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult); -int32_t saveOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult, int32_t resSize); -void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order); -int32_t qAppendTaskStopInfo(SExecTaskInfo* pTaskInfo, SExchangeOpStopInfo* pInfo); -int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order, - int64_t* pData); -void appendCreateTableRow(SStreamState* pState, SExprSupp* pTableSup, SExprSupp* pTagSup, uint64_t groupId, - SSDataBlock* pSrcBlock, int32_t rowId, SSDataBlock* pDestBlock); - -SSDataBlock* buildCreateTableBlock(SExprSupp* tbName, SExprSupp* tag); -SExprInfo* createExpr(SNodeList* pNodeList, int32_t* numOfExprs); - -void copyResultrowToDataBlock(SExprInfo* pExprInfo, int32_t numOfExprs, SResultRow* pRow, SqlFunctionCtx* pCtx, - SSDataBlock* pBlock, const int32_t* rowEntryOffset, SExecTaskInfo* pTaskInfo); -void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExprs, const int32_t* rowEntryOffset); -void doClearBufferedBlocks(SStreamScanInfo* pInfo); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_EXECUTORIMPL_H diff --git a/source/libs/executor/inc/operator.h b/source/libs/executor/inc/operator.h new file mode 100644 index 0000000000..632b817a07 --- /dev/null +++ b/source/libs/executor/inc/operator.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_OPERATOR_H +#define TDENGINE_OPERATOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SOperatorCostInfo { + double openCost; + double totalCost; +} SOperatorCostInfo; + +struct SOperatorInfo; + +typedef int32_t (*__optr_encode_fn_t)(struct SOperatorInfo* pOperator, char** result, int32_t* length); +typedef int32_t (*__optr_decode_fn_t)(struct SOperatorInfo* pOperator, char* result); + +typedef int32_t (*__optr_open_fn_t)(struct SOperatorInfo* pOptr); +typedef SSDataBlock* (*__optr_fn_t)(struct SOperatorInfo* pOptr); +typedef void (*__optr_close_fn_t)(void* param); +typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len); +typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr); + +typedef struct SOperatorFpSet { + __optr_open_fn_t _openFn; // DO NOT invoke this function directly + __optr_fn_t getNextFn; + __optr_fn_t cleanupFn; // call this function to release the allocated resources ASAP + __optr_close_fn_t closeFn; + __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator + __optr_encode_fn_t encodeResultRow; + __optr_decode_fn_t decodeResultRow; + __optr_explain_fn_t getExplainFn; +} SOperatorFpSet; + +enum { + OP_NOT_OPENED = 0x0, + OP_OPENED = 0x1, + OP_RES_TO_RETURN = 0x5, + OP_EXEC_DONE = 0x9, +}; + +typedef struct SOperatorInfo { + uint16_t operatorType; + int16_t resultDataBlockId; + bool blocking; // block operator or not + uint8_t status; // denote if current operator is completed + char* name; // name, for debug purpose + void* info; // extension attribution + SExprSupp exprSupp; + SExecTaskInfo* pTaskInfo; + SOperatorCostInfo cost; + SResultInfo resultInfo; + struct SOperatorInfo** pDownstream; // downstram pointer list + int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator + SOperatorFpSet fpSet; +} SOperatorInfo; + +// operator creater functions +// clang-format off +SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* pHandle, STableListInfo* pTableList, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createTagScanOperatorInfo(SReadHandle* pReadHandle, STagScanPhysiNode* pPhyNode, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScanPhysiNode* pScanPhyNode, const char* pUser, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createTableCountScanOperatorInfo(SReadHandle* handle, STableCountScanPhysiNode* pNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createIndefinitOutputOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* pSortNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** dowStreams, size_t numStreams, SMergePhysiNode* pMergePhysiNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMergeIntervalPhysiNode* pIntervalPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createMergeAlignedIntervalOperatorInfo(SOperatorInfo* downstream, SMergeAlignedIntervalPhysiNode* pNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); + +SOperatorInfo* createSessionAggOperatorInfo(SOperatorInfo* downstream, SSessionWinodwPhysiNode* pSessionNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createDataBlockInfoScanOperator(SReadHandle* readHandle, SBlockDistScanPhysiNode* pBlockScanNode, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createRawScanOperatorInfo(SReadHandle* pHandle, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SStateWinodwPhysiNode* pStateNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamPartitionOperatorInfo(SOperatorInfo* downstream, SStreamPartitionPhysiNode* pPartNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); + +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createGroupSortOperatorInfo(SOperatorInfo* downstream, SGroupSortPhysiNode* pSortPhyNode, SExecTaskInfo* pTaskInfo); + +SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNode* physiNode, SExecTaskInfo* pTaskInfo); +// clang-format on + +SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, + __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain); +int32_t optrDummyOpenFn(SOperatorInfo* pOperator); +int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num); +void setOperatorCompleted(SOperatorInfo* pOperator); +void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32_t type, bool blocking, int32_t status, + void* pInfo, SExecTaskInfo* pTaskInfo); +int32_t optrDefaultBufFn(SOperatorInfo* pOperator); + +SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, SNode* pTagCond, + SNode* pTagIndexCond, const char* pUser, const char* dbname); +void destroyOperator(SOperatorInfo* pOperator); + +SOperatorInfo* extractOperatorInTree(SOperatorInfo* pOperator, int32_t type, const char* id); +int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag, bool inheritUsOrder); +int32_t stopTableScanOperator(SOperatorInfo* pOperator, const char* pIdStr); +int32_t getOperatorExplainExecInfo(struct SOperatorInfo* operatorInfo, SArray* pExecInfoList); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_OPERATOR_H \ No newline at end of file diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h new file mode 100644 index 0000000000..8852265da0 --- /dev/null +++ b/source/libs/executor/inc/querytask.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_QUERYTASK_H +#define TDENGINE_QUERYTASK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define GET_TASKID(_t) (((SExecTaskInfo*)(_t))->id.str) + +enum { + // when this task starts to execute, this status will set + TASK_NOT_COMPLETED = 0x1u, + + /* Task is over + * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc. + * 2. when all data within queried time window, it is also denoted as query_completed + */ + TASK_COMPLETED = 0x2u, +}; + +typedef struct STaskIdInfo { + uint64_t queryId; // this is also a request id + uint64_t subplanId; + uint64_t templateId; + char* str; + int32_t vgId; +} STaskIdInfo; + +typedef struct STaskCostInfo { + int64_t created; + int64_t start; + uint64_t elapsedTime; + double extractListTime; + double groupIdMapTime; + SFileBlockLoadRecorder* pRecoder; +} STaskCostInfo; + +typedef struct STaskStopInfo { + SRWLatch lock; + SArray* pStopInfo; +} STaskStopInfo; + +typedef struct { + STqOffsetVal currentOffset; // for tmq + SMqMetaRsp metaRsp; // for tmq fetching meta + int64_t snapshotVer; + SPackedData submit; // todo remove it + SSchemaWrapper* schema; + char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor + int8_t recoverStep; + int8_t recoverScanFinished; + SQueryTableDataCond tableCond; + int64_t fillHistoryVer1; + int64_t fillHistoryVer2; + SStreamState* pState; + int64_t dataVersion; + int64_t checkPointId; +} SStreamTaskInfo; + +struct SExecTaskInfo { + STaskIdInfo id; + uint32_t status; + STimeWindow window; + STaskCostInfo cost; + int64_t owner; // if it is in execution + int32_t code; + int32_t qbufQuota; // total available buffer (in KB) during execution query + int64_t version; // used for stream to record wal version, why not move to sschemainfo + SStreamTaskInfo streamInfo; + SSchemaInfo schemaInfo; + const char* sql; // query sql string + jmp_buf env; // jump to this position when error happens. + EOPTR_EXEC_MODEL execModel; // operator execution model [batch model|stream model] + SSubplan* pSubplan; + struct SOperatorInfo* pRoot; + SLocalFetch localFetch; + SArray* pResultBlockList; // result block list + STaskStopInfo stopInfo; + SRWLatch lock; // secure the access of STableListInfo +}; + +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst); +SExecTaskInfo* doCreateTask(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model); +void doDestroyTask(SExecTaskInfo* pTaskInfo); +bool isTaskKilled(SExecTaskInfo* pTaskInfo); +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); +void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); +int32_t createExecTaskInfo(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, + int32_t vgId, char* sql, EOPTR_EXEC_MODEL model); +int32_t qAppendTaskStopInfo(SExecTaskInfo* pTaskInfo, SExchangeOpStopInfo* pInfo); +SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_QUERYTASK_H diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index ec8060348d..9b463a3dee 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -15,24 +15,21 @@ #include "filter.h" #include "function.h" -#include "functionMgt.h" #include "os.h" #include "querynodes.h" #include "tfill.h" #include "tname.h" +#include "executorInt.h" +#include "index.h" +#include "operator.h" +#include "query.h" +#include "querytask.h" +#include "tcompare.h" #include "tdatablock.h" #include "tglobal.h" -#include "tmsg.h" -#include "ttime.h" - -#include "executorimpl.h" -#include "index.h" -#include "query.h" -#include "tcompare.h" #include "thash.h" #include "ttypes.h" -#include "vnode.h" typedef struct { bool hasAgg; diff --git a/source/libs/executor/src/cachescanoperator.c b/source/libs/executor/src/cachescanoperator.c index 61f024ebb7..00eb042478 100644 --- a/source/libs/executor/src/cachescanoperator.c +++ b/source/libs/executor/src/cachescanoperator.c @@ -20,7 +20,9 @@ #include "tdatablock.h" #include "tmsg.h" -#include "executorimpl.h" +#include "executorInt.h" +#include "operator.h" +#include "querytask.h" #include "tcompare.h" #include "thash.h" #include "ttypes.h" diff --git a/source/libs/executor/src/dataDeleter.c b/source/libs/executor/src/dataDeleter.c index 96d061fc04..11074b0e94 100644 --- a/source/libs/executor/src/dataDeleter.c +++ b/source/libs/executor/src/dataDeleter.c @@ -15,7 +15,7 @@ #include "dataSinkInt.h" #include "dataSinkMgt.h" -#include "executorimpl.h" +#include "executorInt.h" #include "planner.h" #include "tcompression.h" #include "tdatablock.h" diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index f672088ba6..f7167343d0 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -15,7 +15,7 @@ #include "dataSinkInt.h" #include "dataSinkMgt.h" -#include "executorimpl.h" +#include "executorInt.h" #include "planner.h" #include "tcompression.h" #include "tdatablock.h" diff --git a/source/libs/executor/src/dataInserter.c b/source/libs/executor/src/dataInserter.c index 22f388d406..33eccf4759 100644 --- a/source/libs/executor/src/dataInserter.c +++ b/source/libs/executor/src/dataInserter.c @@ -15,7 +15,7 @@ #include "dataSinkInt.h" #include "dataSinkMgt.h" -#include "executorimpl.h" +#include "executorInt.h" #include "planner.h" #include "tcompression.h" #include "tdatablock.h" diff --git a/source/libs/executor/src/eventwindowoperator.c b/source/libs/executor/src/eventwindowoperator.c index 559fce5224..956d5b714d 100644 --- a/source/libs/executor/src/eventwindowoperator.c +++ b/source/libs/executor/src/eventwindowoperator.c @@ -13,10 +13,12 @@ * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "function.h" #include "functionMgt.h" +#include "operator.h" +#include "querytask.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index c855a104b2..94041140d4 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -13,17 +13,19 @@ * along with this program. If not, see . */ +#include "executorInt.h" #include "filter.h" #include "function.h" +#include "index.h" +#include "operator.h" #include "os.h" +#include "query.h" +#include "querytask.h" +#include "tdatablock.h" +#include "thash.h" +#include "tmsg.h" #include "tname.h" #include "tref.h" -#include "tdatablock.h" -#include "tmsg.h" -#include "executorimpl.h" -#include "index.h" -#include "query.h" -#include "thash.h" typedef struct SFetchRspHandleWrapper { uint32_t exchangeId; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index d79ed0176d..c51dc39b5b 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -24,7 +24,8 @@ #include "ttime.h" #include "executil.h" -#include "executorimpl.h" +#include "executorInt.h" +#include "querytask.h" #include "tcompression.h" typedef struct STableListIdInfo { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 546cd18cda..7a5715e5ed 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -14,9 +14,10 @@ */ #include "executor.h" -#include -#include "executorimpl.h" +#include "executorInt.h" +#include "operator.h" #include "planner.h" +#include "querytask.h" #include "tdatablock.h" #include "tref.h" #include "tudf.h" @@ -249,7 +250,7 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int32_t vgId, int32_t* numOfCols, uint64_t id) { if (msg == NULL) { // create raw scan - SExecTaskInfo* pTaskInfo = doCreateExecTaskInfo(0, id, vgId, OPTR_EXEC_MODEL_QUEUE, ""); + SExecTaskInfo* pTaskInfo = doCreateTask(0, id, vgId, OPTR_EXEC_MODEL_QUEUE); if (NULL == pTaskInfo) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -717,8 +718,6 @@ void qRemoveTaskStopInfo(SExecTaskInfo* pTaskInfo, SExchangeOpStopInfo* pInfo) { taosArrayRemove(pTaskInfo->stopInfo.pStopInfo, idx); } taosWUnLockLatch(&pTaskInfo->stopInfo.lock); - - return; } void qStopTaskOperators(SExecTaskInfo* pTaskInfo) { @@ -1303,3 +1302,25 @@ SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { taosArrayDestroy(plist); return pUidList; } + +static void extractTableList(SArray* pList, const SOperatorInfo* pOperator) { + if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + SStreamScanInfo* pScanInfo = pOperator->info; + STableScanInfo* pTableScanInfo = pScanInfo->pTableScanOp->info; + taosArrayPush(pList, &pTableScanInfo->base.pTableListInfo); + } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + STableScanInfo* pScanInfo = pOperator->info; + taosArrayPush(pList, &pScanInfo->base.pTableListInfo); + } else { + if (pOperator->pDownstream != NULL && pOperator->pDownstream[0] != NULL) { + extractTableList(pList, pOperator->pDownstream[0]); + } + } +} + +SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo) { + SArray* pArray = taosArrayInit(0, POINTER_BYTES); + SOperatorInfo* pOperator = pTaskInfo->pRoot; + extractTableList(pArray, pOperator); + return pArray; +} \ No newline at end of file diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorInt.c similarity index 58% rename from source/libs/executor/src/executorimpl.c rename to source/libs/executor/src/executorInt.c index 7a01267014..d1bde3dba4 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorInt.c @@ -22,13 +22,14 @@ #include "tname.h" #include "tdatablock.h" -#include "tglobal.h" #include "tmsg.h" #include "ttime.h" -#include "executorimpl.h" +#include "executorInt.h" #include "index.h" +#include "operator.h" #include "query.h" +#include "querytask.h" #include "tcompare.h" #include "thash.h" #include "ttypes.h" @@ -71,12 +72,8 @@ static UNUSED_FUNC void* u_realloc(void* p, size_t __size) { #define realloc u_realloc #endif -#define CLEAR_QUERY_STATUS(q, st) ((q)->status &= (~(st))) - static void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExpr, SSDataBlock* pBlock); -static void releaseQueryBuf(size_t numOfTables); - static void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size); static void doApplyScalarCalculation(SOperatorInfo* pOperator, SSDataBlock* pBlock, int32_t order, int32_t scanFlag); @@ -86,44 +83,6 @@ static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int bool createDummyCol); static int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf, SGroupResInfo* pGroupResInfo); -static SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode); - -void setOperatorCompleted(SOperatorInfo* pOperator) { - pOperator->status = OP_EXEC_DONE; - pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start) / 1000.0; - setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); -} - -void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32_t type, bool blocking, int32_t status, - void* pInfo, SExecTaskInfo* pTaskInfo) { - pOperator->name = (char*)name; - pOperator->operatorType = type; - pOperator->blocking = blocking; - pOperator->status = status; - pOperator->info = pInfo; - pOperator->pTaskInfo = pTaskInfo; -} - -int32_t optrDummyOpenFn(SOperatorInfo* pOperator) { - OPTR_SET_OPENED(pOperator); - pOperator->cost.openCost = 0; - return TSDB_CODE_SUCCESS; -} - -SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, - __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, - __optr_explain_fn_t explain) { - SOperatorFpSet fpSet = { - ._openFn = openFn, - .getNextFn = nextFn, - .cleanupFn = cleanup, - .closeFn = closeFn, - .reqBufFn = reqBufFn, - .getExplainFn = explain, - }; - - return fpSet; -} SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, int32_t interBufSize) { SFilePage* pData = NULL; @@ -482,10 +441,6 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB } } -bool isTaskKilled(SExecTaskInfo* pTaskInfo) { return (0 != pTaskInfo->code); } - -void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode) { pTaskInfo->code = rspCode; } - ///////////////////////////////////////////////////////////////////////////////////////////// STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key) { STimeWindow win = {0}; @@ -503,16 +458,6 @@ STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int return win; } -void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status) { - if (status == TASK_NOT_COMPLETED) { - pTaskInfo->status = status; - } else { - // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first - CLEAR_QUERY_STATUS(pTaskInfo, TASK_NOT_COMPLETED); - pTaskInfo->status |= status; - } -} - void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset) { bool init = false; for (int32_t i = 0; i < numOfOutput; ++i) { @@ -923,72 +868,6 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG } } -int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num) { - p->pDownstream = taosMemoryCalloc(1, num * POINTER_BYTES); - if (p->pDownstream == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - memcpy(p->pDownstream, pDownstream, num * POINTER_BYTES); - p->numOfDownstream = num; - return TSDB_CODE_SUCCESS; -} - -int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag, bool inheritUsOrder) { - // todo add more information about exchange operation - int32_t type = pOperator->operatorType; - if (type == QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN || - type == QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN || - type == QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN) { - *order = TSDB_ORDER_ASC; - *scanFlag = MAIN_SCAN; - return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { - if (!inheritUsOrder) { - *order = TSDB_ORDER_ASC; - } - *scanFlag = MAIN_SCAN; - return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { - STableScanInfo* pTableScanInfo = pOperator->info; - *order = pTableScanInfo->base.cond.order; - *scanFlag = pTableScanInfo->base.scanFlag; - return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { - STableMergeScanInfo* pTableScanInfo = pOperator->info; - *order = pTableScanInfo->base.cond.order; - *scanFlag = pTableScanInfo->base.scanFlag; - return TSDB_CODE_SUCCESS; - } else { - if (pOperator->pDownstream == NULL || pOperator->pDownstream[0] == NULL) { - return TSDB_CODE_INVALID_PARA; - } else { - return getTableScanInfo(pOperator->pDownstream[0], order, scanFlag, inheritUsOrder); - } - } -} - -// QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN -SOperatorInfo* extractOperatorInTree(SOperatorInfo* pOperator, int32_t type, const char* id) { - if (pOperator == NULL) { - qError("invalid operator, failed to find tableScanOperator %s", id); - terrno = TSDB_CODE_PAR_INTERNAL_ERROR; - return NULL; - } - - if (pOperator->operatorType == type) { - return pOperator; - } else { - if (pOperator->pDownstream == NULL || pOperator->pDownstream[0] == NULL) { - qError("invalid operator, failed to find tableScanOperator %s", id); - terrno = TSDB_CODE_PAR_INTERNAL_ERROR; - return NULL; - } - - return extractOperatorInTree(pOperator->pDownstream[0], type, id); - } -} - void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs) { for (int32_t i = 0; i < numOfExprs; ++i) { SExprInfo* pExprInfo = &pExpr[i]; @@ -1005,37 +884,6 @@ void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs) { } } -void destroyOperatorInfo(SOperatorInfo* pOperator) { - if (pOperator == NULL) { - return; - } - - if (pOperator->fpSet.closeFn != NULL) { - pOperator->fpSet.closeFn(pOperator->info); - } - - if (pOperator->pDownstream != NULL) { - for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { - destroyOperatorInfo(pOperator->pDownstream[i]); - } - - taosMemoryFreeClear(pOperator->pDownstream); - pOperator->numOfDownstream = 0; - } - - cleanupExprSupp(&pOperator->exprSupp); - taosMemoryFreeClear(pOperator); -} - -// each operator should be set their own function to return total cost buffer -int32_t optrDefaultBufFn(SOperatorInfo* pOperator) { - if (pOperator->blocking) { - return -1; - } else { - return 0; - } -} - int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz) { *defaultPgsz = 4096; while (*defaultPgsz < rowSize * 4) { @@ -1125,136 +973,6 @@ void cleanupExprSupp(SExprSupp* pSupp) { void cleanupBasicInfo(SOptrBasicInfo* pInfo) { pInfo->pRes = blockDataDestroy(pInfo->pRes); } -void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst) { - char* p = dst; - - int32_t offset = 6; - memcpy(p, "TID:0x", offset); - offset += tintToHex(taskId, &p[offset]); - - memcpy(&p[offset], " QID:0x", 7); - offset += 7; - offset += tintToHex(queryId, &p[offset]); - - p[offset] = 0; -} - -SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model, - char* dbFName) { - SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo)); - if (pTaskInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); - pTaskInfo->cost.created = taosGetTimestampUs(); - - pTaskInfo->schemaInfo.dbname = taosStrdup(dbFName); - pTaskInfo->execModel = model; - pTaskInfo->stopInfo.pStopInfo = taosArrayInit(4, sizeof(SExchangeOpStopInfo)); - pTaskInfo->pResultBlockList = taosArrayInit(128, POINTER_BYTES); - - taosInitRWLatch(&pTaskInfo->lock); - pTaskInfo->id.vgId = vgId; - pTaskInfo->id.queryId = queryId; - - pTaskInfo->id.str = taosMemoryMalloc(64); - buildTaskId(taskId, queryId, pTaskInfo->id.str); - return pTaskInfo; -} - -int32_t extractTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, SExecTaskInfo* pTaskInfo) { - SMetaReader mr = {0}; - if (pHandle == NULL) { - terrno = TSDB_CODE_INVALID_PARA; - return terrno; - } - - metaReaderInit(&mr, pHandle->meta, 0); - int32_t code = metaGetTableEntryByUidCache(&mr, pScanNode->uid); - if (code != TSDB_CODE_SUCCESS) { - qError("failed to get the table meta, uid:0x%" PRIx64 ", suid:0x%" PRIx64 ", %s", pScanNode->uid, pScanNode->suid, - GET_TASKID(pTaskInfo)); - - metaReaderClear(&mr); - return terrno; - } - - SSchemaInfo* pSchemaInfo = &pTaskInfo->schemaInfo; - pSchemaInfo->tablename = taosStrdup(mr.me.name); - - if (mr.me.type == TSDB_SUPER_TABLE) { - pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow); - pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version; - } else if (mr.me.type == TSDB_CHILD_TABLE) { - tDecoderClear(&mr.coder); - - tb_uid_t suid = mr.me.ctbEntry.suid; - code = metaGetTableEntryByUidCache(&mr, suid); - if (code != TSDB_CODE_SUCCESS) { - metaReaderClear(&mr); - return terrno; - } - - pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow); - pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version; - } else { - pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.ntbEntry.schemaRow); - } - - metaReaderClear(&mr); - - pSchemaInfo->qsw = extractQueriedColumnSchema(pScanNode); - return TSDB_CODE_SUCCESS; -} - -SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) { - int32_t numOfCols = LIST_LENGTH(pScanNode->pScanCols); - int32_t numOfTags = LIST_LENGTH(pScanNode->pScanPseudoCols); - - SSchemaWrapper* pqSw = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); - pqSw->pSchema = taosMemoryCalloc(numOfCols + numOfTags, sizeof(SSchema)); - - for (int32_t i = 0; i < numOfCols; ++i) { - STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanCols, i); - SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; - - SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++]; - pSchema->colId = pColNode->colId; - pSchema->type = pColNode->node.resType.type; - pSchema->bytes = pColNode->node.resType.bytes; - tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name)); - } - - // this the tags and pseudo function columns, we only keep the tag columns - for (int32_t i = 0; i < numOfTags; ++i) { - STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanPseudoCols, i); - - int32_t type = nodeType(pNode->pExpr); - if (type == QUERY_NODE_COLUMN) { - SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; - - SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++]; - pSchema->colId = pColNode->colId; - pSchema->type = pColNode->node.resType.type; - pSchema->bytes = pColNode->node.resType.bytes; - tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name)); - } - } - - return pqSw; -} - -static void cleanupTableSchemaInfo(SSchemaInfo* pSchemaInfo) { - taosMemoryFreeClear(pSchemaInfo->dbname); - taosMemoryFreeClear(pSchemaInfo->tablename); - tDeleteSSchemaWrapper(pSchemaInfo->sw); - tDeleteSSchemaWrapper(pSchemaInfo->qsw); -} - -static void cleanupStreamInfo(SStreamTaskInfo* pStreamInfo) { tDeleteSSchemaWrapper(pStreamInfo->schema); } - bool groupbyTbname(SNodeList* pGroupList) { bool bytbname = false; if (LIST_LENGTH(pGroupList) == 1) { @@ -1268,306 +986,6 @@ bool groupbyTbname(SNodeList* pGroupList) { return bytbname; } -SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, SNode* pTagCond, - SNode* pTagIndexCond, const char* pUser) { - int32_t type = nodeType(pPhyNode); - const char* idstr = GET_TASKID(pTaskInfo); - - if (pPhyNode->pChildren == NULL || LIST_LENGTH(pPhyNode->pChildren) == 0) { - SOperatorInfo* pOperator = NULL; - if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == type) { - STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; - - // NOTE: this is an patch to fix the physical plan - // TODO remove it later - if (pTableScanNode->scan.node.pLimit != NULL) { - pTableScanNode->groupSort = true; - } - - STableListInfo* pTableListInfo = tableListCreate(); - int32_t code = - createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, pHandle, - pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); - if (code) { - pTaskInfo->code = code; - tableListDestroy(pTableListInfo); - qError("failed to createScanTableListInfo, code:%s, %s", tstrerror(code), idstr); - return NULL; - } - - code = extractTableSchemaInfo(pHandle, &pTableScanNode->scan, pTaskInfo); - if (code) { - pTaskInfo->code = terrno; - tableListDestroy(pTableListInfo); - return NULL; - } - - pOperator = createTableScanOperatorInfo(pTableScanNode, pHandle, pTableListInfo, pTaskInfo); - if (NULL == pOperator) { - pTaskInfo->code = terrno; - return NULL; - } - - STableScanInfo* pScanInfo = pOperator->info; - pTaskInfo->cost.pRecoder = &pScanInfo->base.readRecorder; - } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == type) { - STableMergeScanPhysiNode* pTableScanNode = (STableMergeScanPhysiNode*)pPhyNode; - STableListInfo* pTableListInfo = tableListCreate(); - - int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, true, pHandle, - pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); - if (code) { - pTaskInfo->code = code; - tableListDestroy(pTableListInfo); - qError("failed to createScanTableListInfo, code: %s", tstrerror(code)); - return NULL; - } - - code = extractTableSchemaInfo(pHandle, &pTableScanNode->scan, pTaskInfo); - if (code) { - pTaskInfo->code = terrno; - tableListDestroy(pTableListInfo); - return NULL; - } - - pOperator = createTableMergeScanOperatorInfo(pTableScanNode, pHandle, pTableListInfo, pTaskInfo); - if (NULL == pOperator) { - pTaskInfo->code = terrno; - tableListDestroy(pTableListInfo); - return NULL; - } - - STableScanInfo* pScanInfo = pOperator->info; - pTaskInfo->cost.pRecoder = &pScanInfo->base.readRecorder; - } else if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == type) { - pOperator = createExchangeOperatorInfo(pHandle ? pHandle->pMsgCb->clientRpc : NULL, (SExchangePhysiNode*)pPhyNode, - pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN == type) { - STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; - STableListInfo* pTableListInfo = tableListCreate(); - - if (pHandle->vnode) { - int32_t code = - createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, - pHandle, pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); - if (code) { - pTaskInfo->code = code; - tableListDestroy(pTableListInfo); - qError("failed to createScanTableListInfo, code: %s", tstrerror(code)); - return NULL; - } - } - - pTaskInfo->schemaInfo.qsw = extractQueriedColumnSchema(&pTableScanNode->scan); - pOperator = createStreamScanOperatorInfo(pHandle, pTableScanNode, pTagCond, pTableListInfo, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) { - SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode; - pOperator = createSysTableScanOperatorInfo(pHandle, pSysScanPhyNode, pUser, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN == type) { - STableCountScanPhysiNode* pTblCountScanNode = (STableCountScanPhysiNode*)pPhyNode; - pOperator = createTableCountScanOperatorInfo(pHandle, pTblCountScanNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) { - STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode; - STableListInfo* pTableListInfo = tableListCreate(); - int32_t code = createScanTableListInfo(pScanPhyNode, NULL, false, pHandle, pTableListInfo, pTagCond, - pTagIndexCond, pTaskInfo); - if (code != TSDB_CODE_SUCCESS) { - pTaskInfo->code = code; - qError("failed to getTableList, code: %s", tstrerror(code)); - return NULL; - } - - pOperator = createTagScanOperatorInfo(pHandle, pScanPhyNode, pTableListInfo, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN == type) { - SBlockDistScanPhysiNode* pBlockNode = (SBlockDistScanPhysiNode*)pPhyNode; - STableListInfo* pTableListInfo = tableListCreate(); - - if (pBlockNode->tableType == TSDB_SUPER_TABLE) { - SArray* pList = taosArrayInit(4, sizeof(STableKeyInfo)); - int32_t code = vnodeGetAllTableList(pHandle->vnode, pBlockNode->uid, pList); - if (code != TSDB_CODE_SUCCESS) { - pTaskInfo->code = terrno; - return NULL; - } - - size_t num = taosArrayGetSize(pList); - for (int32_t i = 0; i < num; ++i) { - STableKeyInfo* p = taosArrayGet(pList, i); - tableListAddTableInfo(pTableListInfo, p->uid, 0); - } - - taosArrayDestroy(pList); - } else { // Create group with only one table - tableListAddTableInfo(pTableListInfo, pBlockNode->uid, 0); - } - - pOperator = createDataBlockInfoScanOperator(pHandle, pBlockNode, pTableListInfo, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN == type) { - SLastRowScanPhysiNode* pScanNode = (SLastRowScanPhysiNode*)pPhyNode; - STableListInfo* pTableListInfo = tableListCreate(); - - int32_t code = createScanTableListInfo(&pScanNode->scan, pScanNode->pGroupTags, true, pHandle, pTableListInfo, - pTagCond, pTagIndexCond, pTaskInfo); - if (code != TSDB_CODE_SUCCESS) { - pTaskInfo->code = code; - return NULL; - } - - code = extractTableSchemaInfo(pHandle, &pScanNode->scan, pTaskInfo); - if (code != TSDB_CODE_SUCCESS) { - pTaskInfo->code = code; - return NULL; - } - - pOperator = createCacherowsScanOperator(pScanNode, pHandle, pTableListInfo, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) { - pOperator = createProjectOperatorInfo(NULL, (SProjectPhysiNode*)pPhyNode, pTaskInfo); - } else { - terrno = TSDB_CODE_INVALID_PARA; - return NULL; - } - - if (pOperator != NULL) { // todo moved away - pOperator->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId; - } - - return pOperator; - } - - size_t size = LIST_LENGTH(pPhyNode->pChildren); - SOperatorInfo** ops = taosMemoryCalloc(size, POINTER_BYTES); - if (ops == NULL) { - return NULL; - } - - for (int32_t i = 0; i < size; ++i) { - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, i); - ops[i] = createOperatorTree(pChildNode, pTaskInfo, pHandle, pTagCond, pTagIndexCond, pUser); - if (ops[i] == NULL) { - taosMemoryFree(ops); - return NULL; - } - } - - SOperatorInfo* pOptr = NULL; - if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) { - pOptr = createProjectOperatorInfo(ops[0], (SProjectPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_AGG == type) { - SAggPhysiNode* pAggNode = (SAggPhysiNode*)pPhyNode; - if (pAggNode->pGroupKeys != NULL) { - pOptr = createGroupOperatorInfo(ops[0], pAggNode, pTaskInfo); - } else { - pOptr = createAggregateOperatorInfo(ops[0], pAggNode, pTaskInfo); - } - } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type) { - SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; - pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { - pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { - SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode; - pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL == type) { - SMergeIntervalPhysiNode* pIntervalPhyNode = (SMergeIntervalPhysiNode*)pPhyNode; - pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) { - int32_t children = 0; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) { - int32_t children = pHandle->numOfVgroups; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); - } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) { - pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) { - pOptr = createGroupSortOperatorInfo(ops[0], (SGroupSortPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE == type) { - SMergePhysiNode* pMergePhyNode = (SMergePhysiNode*)pPhyNode; - pOptr = createMultiwayMergeOperatorInfo(ops, size, pMergePhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION == type) { - SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; - pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) { - pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) { - int32_t children = 0; - pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) { - int32_t children = pHandle->numOfVgroups; - pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); - } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) { - pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) { - pOptr = createStreamPartitionOperatorInfo(ops[0], (SStreamPartitionPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE == type) { - SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode; - pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) { - pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) { - pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) { - pOptr = createFillOperatorInfo(ops[0], (SFillPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL == type) { - pOptr = createStreamFillOperatorInfo(ops[0], (SStreamFillPhysiNode*)pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC == type) { - pOptr = createIndefinitOutputOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC == type) { - pOptr = createTimeSliceOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT == type) { - pOptr = createEventwindowOperatorInfo(ops[0], pPhyNode, pTaskInfo); - } else { - terrno = TSDB_CODE_INVALID_PARA; - taosMemoryFree(ops); - return NULL; - } - - taosMemoryFree(ops); - if (pOptr) { - pOptr->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId; - } - - return pOptr; -} - -static int32_t extractTbscanInStreamOpTree(SOperatorInfo* pOperator, STableScanInfo** ppInfo) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - if (pOperator->numOfDownstream == 0) { - qError("failed to find stream scan operator"); - return TSDB_CODE_APP_ERROR; - } - - if (pOperator->numOfDownstream > 1) { - qError("join not supported for stream block scan"); - return TSDB_CODE_APP_ERROR; - } - return extractTbscanInStreamOpTree(pOperator->pDownstream[0], ppInfo); - } else { - SStreamScanInfo* pInfo = pOperator->info; - *ppInfo = pInfo->pTableScanOp->info; - return 0; - } -} - -int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) { - if (pNode->pChildren == NULL || LIST_LENGTH(pNode->pChildren) == 0) { - if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == pNode->type) { - *ppNode = (STableScanPhysiNode*)pNode; - return 0; - } else { - terrno = TSDB_CODE_APP_ERROR; - return -1; - } - } else { - if (LIST_LENGTH(pNode->pChildren) != 1) { - terrno = TSDB_CODE_APP_ERROR; - return -1; - } - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pNode->pChildren, 0); - return extractTableScanNode(pChildNode, ppNode); - } - return -1; -} - int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, SExecTaskInfo* pTask, SReadHandle* readHandle) { switch (pNode->type) { case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT: { @@ -1615,178 +1033,6 @@ int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, SExecTaskInfo* return TSDB_CODE_SUCCESS; } -int32_t createExecTaskInfo(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, - int32_t vgId, char* sql, EOPTR_EXEC_MODEL model) { - *pTaskInfo = doCreateExecTaskInfo(pPlan->id.queryId, taskId, vgId, model, pPlan->dbFName); - if (*pTaskInfo == NULL) { - goto _complete; - } - - if (pHandle) { - if (pHandle->pStateBackend) { - (*pTaskInfo)->streamInfo.pState = pHandle->pStateBackend; - } - } - - (*pTaskInfo)->sql = sql; - sql = NULL; - - (*pTaskInfo)->pSubplan = pPlan; - (*pTaskInfo)->pRoot = - createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, pPlan->pTagCond, pPlan->pTagIndexCond, pPlan->user); - - if (NULL == (*pTaskInfo)->pRoot) { - terrno = (*pTaskInfo)->code; - goto _complete; - } - - return TSDB_CODE_SUCCESS; - -_complete: - taosMemoryFree(sql); - doDestroyTask(*pTaskInfo); - return terrno; -} - -static void freeBlock(void* pParam) { - SSDataBlock* pBlock = *(SSDataBlock**)pParam; - blockDataDestroy(pBlock); -} - -void doDestroyTask(SExecTaskInfo* pTaskInfo) { - qDebug("%s execTask is freed", GET_TASKID(pTaskInfo)); - destroyOperatorInfo(pTaskInfo->pRoot); - cleanupTableSchemaInfo(&pTaskInfo->schemaInfo); - cleanupStreamInfo(&pTaskInfo->streamInfo); - - if (!pTaskInfo->localFetch.localExec) { - nodesDestroyNode((SNode*)pTaskInfo->pSubplan); - } - - taosArrayDestroyEx(pTaskInfo->pResultBlockList, freeBlock); - taosArrayDestroy(pTaskInfo->stopInfo.pStopInfo); - taosMemoryFreeClear(pTaskInfo->sql); - taosMemoryFreeClear(pTaskInfo->id.str); - taosMemoryFreeClear(pTaskInfo); -} - -static int64_t getQuerySupportBufSize(size_t numOfTables) { - size_t s1 = sizeof(STableQueryInfo); - // size_t s3 = sizeof(STableCheckInfo); buffer consumption in tsdb - return (int64_t)(s1 * 1.5 * numOfTables); -} - -int32_t checkForQueryBuf(size_t numOfTables) { - int64_t t = getQuerySupportBufSize(numOfTables); - if (tsQueryBufferSizeBytes < 0) { - return TSDB_CODE_SUCCESS; - } else if (tsQueryBufferSizeBytes > 0) { - while (1) { - int64_t s = tsQueryBufferSizeBytes; - int64_t remain = s - t; - if (remain >= 0) { - if (atomic_val_compare_exchange_64(&tsQueryBufferSizeBytes, s, remain) == s) { - return TSDB_CODE_SUCCESS; - } - } else { - return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; - } - } - } - - // disable query processing if the value of tsQueryBufferSize is zero. - return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; -} - -void releaseQueryBuf(size_t numOfTables) { - if (tsQueryBufferSizeBytes < 0) { - return; - } - - int64_t t = getQuerySupportBufSize(numOfTables); - - // restore value is not enough buffer available - atomic_add_fetch_64(&tsQueryBufferSizeBytes, t); -} - -int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SArray* pExecInfoList) { - SExplainExecInfo execInfo = {0}; - SExplainExecInfo* pExplainInfo = taosArrayPush(pExecInfoList, &execInfo); - - pExplainInfo->numOfRows = operatorInfo->resultInfo.totalRows; - pExplainInfo->startupCost = operatorInfo->cost.openCost; - pExplainInfo->totalCost = operatorInfo->cost.totalCost; - pExplainInfo->verboseLen = 0; - pExplainInfo->verboseInfo = NULL; - - if (operatorInfo->fpSet.getExplainFn) { - int32_t code = - operatorInfo->fpSet.getExplainFn(operatorInfo, &pExplainInfo->verboseInfo, &pExplainInfo->verboseLen); - if (code) { - qError("%s operator getExplainFn failed, code:%s", GET_TASKID(operatorInfo->pTaskInfo), tstrerror(code)); - return code; - } - } - - int32_t code = 0; - for (int32_t i = 0; i < operatorInfo->numOfDownstream; ++i) { - code = getOperatorExplainExecInfo(operatorInfo->pDownstream[i], pExecInfoList); - if (code != TSDB_CODE_SUCCESS) { - // taosMemoryFreeClear(*pRes); - return TSDB_CODE_OUT_OF_MEMORY; - } - } - - return TSDB_CODE_SUCCESS; -} -void resultRowToString(void* row, int32_t size, char* buf) { - SResultRow* p = row; - int32_t len = 0; - len += sprintf(buf + len, - "pageId:%d, offset:%d, startInterp:%d, endInterp:%d, closed:%d, numOfRows:%d, skey:%" PRId64 - ", ekey:%" PRId64, - p->pageId, p->offset, p->startInterp, p->endInterp, p->closed, p->numOfRows, p->win.skey, p->win.ekey); - - int32_t numOfEntryInfo = (size - sizeof(SResultRow)) / sizeof(struct SResultRowEntryInfo); - len += sprintf(buf + len, ", entryInfo size:%d", numOfEntryInfo); - for (int i = 0; i < numOfEntryInfo; i++) { - SResultRowEntryInfo* pInfo = &p->pEntryInfo[i]; - if (len >= 200 * size - 64 || i >= 5) { - break; - } - len += sprintf(buf + len, "[inited:%d, complete:%d, nullRes:%d, numOfRes:%d]", pInfo->initialized, pInfo->complete, - pInfo->isNullRes, pInfo->numOfRes); - } -} -/* - * - */ - -int32_t resultRowEncode(void* k, int32_t* size, char* buf) { - // SResultRow* key = k; - // int len = 0; - // int struLen = *size; - // len += taosEncodeFixedI32((void**)&buf, key->pageId); - - // uint32_t offset = key->offset; - // len += taosEncodeFixedU32((void**)&buf, offset); - - // len += taosEncodeFixedI8((void**)&buf, key->startInterp); - // len += taosEncodeFixedI8((void**)&buf, key->endInterp); - // len += taosEncodeFixedI8((void**)&buf, key->closed); - // len += taosEncodeFixedU32((void**)&buf, key->numOfRows); - - // len += taosEncodeFixedI64((void**)&buf, key->win.skey); - // len += taosEncodeFixedI64((void**)&buf, key->win.ekey); - - // int32_t numOfEntryInfo = (struLen - sizeof(SResultRow)) / sizeof(struct SResultRowEntryInfo); - // len += taosEncodeFixedI32((void**)&buf, numOfEntryInfo); - // for (int i = 0; i < numOfEntryInfo; i++) { - // SResultRowEntryInfo* p = &key->pEntryInfo[i]; - - // uint8_t value = p->initialized ? 1 : 0; - // len += taosEncodeFixedU8((void**)&buf, value); - // value = p->complete ? 1 : 0; // len += taosEncodeFixedU8((void**)&buf, value); @@ -1999,25 +1245,3 @@ void qStreamCloseTsdbReader(void* task) { } } } - -static void extractTableList(SArray* pList, const SOperatorInfo* pOperator) { - if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - SStreamScanInfo* pScanInfo = pOperator->info; - STableScanInfo* pTableScanInfo = pScanInfo->pTableScanOp->info; - taosArrayPush(pList, &pTableScanInfo->base.pTableListInfo); - } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { - STableScanInfo* pScanInfo = pOperator->info; - taosArrayPush(pList, &pScanInfo->base.pTableListInfo); - } else { - if (pOperator->pDownstream != NULL && pOperator->pDownstream[0] != NULL) { - extractTableList(pList, pOperator->pDownstream[0]); - } - } -} - -SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo) { - SArray* pArray = taosArrayInit(0, POINTER_BYTES); - SOperatorInfo* pOperator = pTaskInfo->pRoot; - extractTableList(pArray, pOperator); - return pArray; -} \ No newline at end of file diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index 98beb30c30..5101e36992 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -20,16 +20,18 @@ #include "tmsg.h" #include "ttypes.h" -#include "executorimpl.h" +#include "executorInt.h" #include "tcommon.h" #include "thash.h" #include "ttime.h" -#include "executorInt.h" #include "function.h" #include "querynodes.h" #include "tdatablock.h" #include "tfill.h" +#include "operator.h" +#include "querytask.h" + #define FILL_POS_INVALID 0 #define FILL_POS_START 1 diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index e1ac70cf0d..280edf8b53 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -22,7 +22,8 @@ #include "tmsg.h" #include "executorInt.h" -#include "executorimpl.h" +#include "operator.h" +#include "querytask.h" #include "tcompare.h" #include "thash.h" #include "ttypes.h" diff --git a/source/libs/executor/src/joinoperator.c b/source/libs/executor/src/joinoperator.c index 31ff11eec5..754b5f4737 100644 --- a/source/libs/executor/src/joinoperator.c +++ b/source/libs/executor/src/joinoperator.c @@ -13,11 +13,13 @@ * along with this program. If not, see . */ +#include "executorInt.h" #include "filter.h" -#include "executorimpl.h" #include "function.h" +#include "operator.h" #include "os.h" #include "querynodes.h" +#include "querytask.h" #include "tcompare.h" #include "tdatablock.h" #include "thash.h" diff --git a/source/libs/executor/src/operator.c b/source/libs/executor/src/operator.c new file mode 100644 index 0000000000..729178dc60 --- /dev/null +++ b/source/libs/executor/src/operator.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "filter.h" +#include "function.h" +#include "os.h" +#include "tname.h" + +#include "tglobal.h" + +#include "executorInt.h" +#include "index.h" +#include "operator.h" +#include "query.h" +#include "querytask.h" +#include "vnode.h" + +SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, + __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, + __optr_explain_fn_t explain) { + SOperatorFpSet fpSet = { + ._openFn = openFn, + .getNextFn = nextFn, + .cleanupFn = cleanup, + .closeFn = closeFn, + .reqBufFn = reqBufFn, + .getExplainFn = explain, + }; + + return fpSet; +} + +int32_t optrDummyOpenFn(SOperatorInfo* pOperator) { + OPTR_SET_OPENED(pOperator); + pOperator->cost.openCost = 0; + return TSDB_CODE_SUCCESS; +} + +int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num) { + p->pDownstream = taosMemoryCalloc(1, num * POINTER_BYTES); + if (p->pDownstream == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + memcpy(p->pDownstream, pDownstream, num * POINTER_BYTES); + p->numOfDownstream = num; + return TSDB_CODE_SUCCESS; +} + +void setOperatorCompleted(SOperatorInfo* pOperator) { + pOperator->status = OP_EXEC_DONE; + pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start) / 1000.0; + setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); +} + +void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32_t type, bool blocking, int32_t status, + void* pInfo, SExecTaskInfo* pTaskInfo) { + pOperator->name = (char*)name; + pOperator->operatorType = type; + pOperator->blocking = blocking; + pOperator->status = status; + pOperator->info = pInfo; + pOperator->pTaskInfo = pTaskInfo; +} + +// each operator should be set their own function to return total cost buffer +int32_t optrDefaultBufFn(SOperatorInfo* pOperator) { + if (pOperator->blocking) { + return -1; + } else { + return 0; + } +} + +static int64_t getQuerySupportBufSize(size_t numOfTables) { + size_t s1 = sizeof(STableQueryInfo); + // size_t s3 = sizeof(STableCheckInfo); buffer consumption in tsdb + return (int64_t)(s1 * 1.5 * numOfTables); +} + +int32_t checkForQueryBuf(size_t numOfTables) { + int64_t t = getQuerySupportBufSize(numOfTables); + if (tsQueryBufferSizeBytes < 0) { + return TSDB_CODE_SUCCESS; + } else if (tsQueryBufferSizeBytes > 0) { + while (1) { + int64_t s = tsQueryBufferSizeBytes; + int64_t remain = s - t; + if (remain >= 0) { + if (atomic_val_compare_exchange_64(&tsQueryBufferSizeBytes, s, remain) == s) { + return TSDB_CODE_SUCCESS; + } + } else { + return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; + } + } + } + + // disable query processing if the value of tsQueryBufferSize is zero. + return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; +} + +void releaseQueryBuf(size_t numOfTables) { + if (tsQueryBufferSizeBytes < 0) { + return; + } + + int64_t t = getQuerySupportBufSize(numOfTables); + + // restore value is not enough buffer available + atomic_add_fetch_64(&tsQueryBufferSizeBytes, t); +} + +typedef enum { + OPTR_FN_RET_CONTINUE = 0x1, + OPTR_FN_RET_ABORT = 0x2, +} ERetType; + +typedef struct STraverParam { + void* pRet; + int32_t code; + void* pParam; +} STraverParam; + +// iterate the operator tree helper +typedef ERetType (*optr_fn_t)(SOperatorInfo *pOperator, STraverParam *pParam, const char* pIdstr); + +void traverseOperatorTree(SOperatorInfo* pOperator, optr_fn_t fn, STraverParam* pParam, const char* id) { + if (pOperator == NULL) { + return; + } + + ERetType ret = fn(pOperator, pParam, id); + if (ret == OPTR_FN_RET_ABORT || pParam->code != TSDB_CODE_SUCCESS) { + return; + } + + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + traverseOperatorTree(pOperator->pDownstream[i], fn, pParam, id); + if (pParam->code != 0) { + break; + } + } +} + +ERetType extractOperatorInfo(SOperatorInfo* pOperator, STraverParam* pParam, const char* pIdStr) { + STraverParam* p = pParam; + if (pOperator->operatorType == *(int32_t*)p->pParam) { + p->pRet = pOperator; + return OPTR_FN_RET_ABORT; + } else { + return OPTR_FN_RET_CONTINUE; + } +} + +// QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN +SOperatorInfo* extractOperatorInTree(SOperatorInfo* pOperator, int32_t type, const char* id) { + if (pOperator == NULL) { + qError("invalid operator, failed to find tableScanOperator %s", id); + terrno = TSDB_CODE_PAR_INTERNAL_ERROR; + return NULL; + } + + STraverParam p = {.pParam = &type, .pRet = NULL}; + traverseOperatorTree(pOperator, extractOperatorInfo, &p, id); + if (p.code != 0) { + terrno = p.code; + return NULL; + } else { + return p.pRet; + } +} + +typedef struct SExtScanInfo { + int32_t order; + int32_t scanFlag; + int32_t inheritUsOrder; +} SExtScanInfo; + +static ERetType extractScanInfo(SOperatorInfo* pOperator, STraverParam* pParam, const char* pIdStr) { + int32_t type = pOperator->operatorType; + SExtScanInfo* pInfo = pParam->pParam; + + if (type == QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN || + type == QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN || + type == QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN) { + pInfo->order = TSDB_ORDER_ASC; + pInfo->scanFlag= MAIN_SCAN; + return OPTR_FN_RET_ABORT; + } else if (type == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { + if (!pInfo->inheritUsOrder) { + pInfo->order = TSDB_ORDER_ASC; + } + pInfo->scanFlag= MAIN_SCAN; + return OPTR_FN_RET_ABORT; + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + STableScanInfo* pTableScanInfo = pOperator->info; + pInfo->order = pTableScanInfo->base.cond.order; + pInfo->scanFlag= pTableScanInfo->base.scanFlag; + return OPTR_FN_RET_ABORT; + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { + STableMergeScanInfo* pTableScanInfo = pOperator->info; + pInfo->order = pTableScanInfo->base.cond.order; + pInfo->scanFlag= pTableScanInfo->base.scanFlag; + return OPTR_FN_RET_ABORT; + } else { + return OPTR_FN_RET_CONTINUE; + } +} + +int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag, bool inheritUsOrder) { + SExtScanInfo info = {.inheritUsOrder = inheritUsOrder, .order = *order}; + STraverParam p = {.pParam = &info}; + + traverseOperatorTree(pOperator, extractScanInfo, &p, NULL); + *order = info.order; + *scanFlag = info.scanFlag; + + ASSERT(*order == TSDB_ORDER_ASC || *order == TSDB_ORDER_DESC); + return p.code; +} + +static ERetType doStopDataReader(SOperatorInfo* pOperator, STraverParam* pParam, const char* pIdStr) { + if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + STableScanInfo* pInfo = pOperator->info; + + if (pInfo->base.dataReader != NULL) { + tsdbReaderSetCloseFlag(pInfo->base.dataReader); + } + return OPTR_FN_RET_ABORT; + } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + SStreamScanInfo* pInfo = pOperator->info; + + if (pInfo->pTableScanOp != NULL) { + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + if (pTableScanInfo != NULL && pTableScanInfo->base.dataReader != NULL) { + tsdbReaderSetCloseFlag(pTableScanInfo->base.dataReader); + } + } + + return OPTR_FN_RET_ABORT; + } + + return OPTR_FN_RET_CONTINUE; +} + +int32_t stopTableScanOperator(SOperatorInfo* pOperator, const char* pIdStr) { + STraverParam p = {0}; + traverseOperatorTree(pOperator, doStopDataReader, &p, pIdStr); + return p.code; +} + +SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, SNode* pTagCond, + SNode* pTagIndexCond, const char* pUser, const char* dbname) { + int32_t type = nodeType(pPhyNode); + const char* idstr = GET_TASKID(pTaskInfo); + + if (pPhyNode->pChildren == NULL || LIST_LENGTH(pPhyNode->pChildren) == 0) { + SOperatorInfo* pOperator = NULL; + if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == type) { + STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; + + // NOTE: this is an patch to fix the physical plan + // TODO remove it later + if (pTableScanNode->scan.node.pLimit != NULL) { + pTableScanNode->groupSort = true; + } + + STableListInfo* pTableListInfo = tableListCreate(); + int32_t code = + createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, pHandle, + pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); + if (code) { + pTaskInfo->code = code; + tableListDestroy(pTableListInfo); + qError("failed to createScanTableListInfo, code:%s, %s", tstrerror(code), idstr); + return NULL; + } + + code = initQueriedTableSchemaInfo(pHandle, &pTableScanNode->scan, dbname, pTaskInfo); + if (code) { + pTaskInfo->code = code; + tableListDestroy(pTableListInfo); + return NULL; + } + + pOperator = createTableScanOperatorInfo(pTableScanNode, pHandle, pTableListInfo, pTaskInfo); + if (NULL == pOperator) { + pTaskInfo->code = terrno; + tableListDestroy(pTableListInfo); + return NULL; + } + + STableScanInfo* pScanInfo = pOperator->info; + pTaskInfo->cost.pRecoder = &pScanInfo->base.readRecorder; + } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == type) { + STableMergeScanPhysiNode* pTableScanNode = (STableMergeScanPhysiNode*)pPhyNode; + STableListInfo* pTableListInfo = tableListCreate(); + + int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, true, pHandle, + pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); + if (code) { + pTaskInfo->code = code; + tableListDestroy(pTableListInfo); + qError("failed to createScanTableListInfo, code: %s", tstrerror(code)); + return NULL; + } + + code = initQueriedTableSchemaInfo(pHandle, &pTableScanNode->scan, dbname, pTaskInfo); + if (code) { + pTaskInfo->code = terrno; + tableListDestroy(pTableListInfo); + return NULL; + } + + pOperator = createTableMergeScanOperatorInfo(pTableScanNode, pHandle, pTableListInfo, pTaskInfo); + if (NULL == pOperator) { + pTaskInfo->code = terrno; + tableListDestroy(pTableListInfo); + return NULL; + } + + STableScanInfo* pScanInfo = pOperator->info; + pTaskInfo->cost.pRecoder = &pScanInfo->base.readRecorder; + } else if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == type) { + pOperator = createExchangeOperatorInfo(pHandle ? pHandle->pMsgCb->clientRpc : NULL, (SExchangePhysiNode*)pPhyNode, + pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN == type) { + STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; + STableListInfo* pTableListInfo = tableListCreate(); + + if (pHandle->vnode) { + int32_t code = + createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, + pHandle, pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo); + if (code) { + pTaskInfo->code = code; + tableListDestroy(pTableListInfo); + qError("failed to createScanTableListInfo, code: %s", tstrerror(code)); + return NULL; + } + } + + pTaskInfo->schemaInfo.qsw = extractQueriedColumnSchema(&pTableScanNode->scan); + pOperator = createStreamScanOperatorInfo(pHandle, pTableScanNode, pTagCond, pTableListInfo, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) { + SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode; + pOperator = createSysTableScanOperatorInfo(pHandle, pSysScanPhyNode, pUser, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN == type) { + STableCountScanPhysiNode* pTblCountScanNode = (STableCountScanPhysiNode*)pPhyNode; + pOperator = createTableCountScanOperatorInfo(pHandle, pTblCountScanNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) { + STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode; + STableListInfo* pTableListInfo = tableListCreate(); + int32_t code = createScanTableListInfo(pScanPhyNode, NULL, false, pHandle, pTableListInfo, pTagCond, + pTagIndexCond, pTaskInfo); + if (code != TSDB_CODE_SUCCESS) { + pTaskInfo->code = code; + qError("failed to getTableList, code: %s", tstrerror(code)); + return NULL; + } + + pOperator = createTagScanOperatorInfo(pHandle, pScanPhyNode, pTableListInfo, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN == type) { + SBlockDistScanPhysiNode* pBlockNode = (SBlockDistScanPhysiNode*)pPhyNode; + STableListInfo* pTableListInfo = tableListCreate(); + + if (pBlockNode->tableType == TSDB_SUPER_TABLE) { + SArray* pList = taosArrayInit(4, sizeof(STableKeyInfo)); + int32_t code = vnodeGetAllTableList(pHandle->vnode, pBlockNode->uid, pList); + if (code != TSDB_CODE_SUCCESS) { + pTaskInfo->code = terrno; + return NULL; + } + + size_t num = taosArrayGetSize(pList); + for (int32_t i = 0; i < num; ++i) { + STableKeyInfo* p = taosArrayGet(pList, i); + tableListAddTableInfo(pTableListInfo, p->uid, 0); + } + + taosArrayDestroy(pList); + } else { // Create group with only one table + tableListAddTableInfo(pTableListInfo, pBlockNode->uid, 0); + } + + pOperator = createDataBlockInfoScanOperator(pHandle, pBlockNode, pTableListInfo, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN == type) { + SLastRowScanPhysiNode* pScanNode = (SLastRowScanPhysiNode*)pPhyNode; + STableListInfo* pTableListInfo = tableListCreate(); + + int32_t code = createScanTableListInfo(&pScanNode->scan, pScanNode->pGroupTags, true, pHandle, pTableListInfo, + pTagCond, pTagIndexCond, pTaskInfo); + if (code != TSDB_CODE_SUCCESS) { + pTaskInfo->code = code; + return NULL; + } + + code = initQueriedTableSchemaInfo(pHandle, &pScanNode->scan, dbname, pTaskInfo); + if (code != TSDB_CODE_SUCCESS) { + pTaskInfo->code = code; + return NULL; + } + + pOperator = createCacherowsScanOperator(pScanNode, pHandle, pTableListInfo, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) { + pOperator = createProjectOperatorInfo(NULL, (SProjectPhysiNode*)pPhyNode, pTaskInfo); + } else { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + + if (pOperator != NULL) { // todo moved away + pOperator->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId; + } + + return pOperator; + } + + size_t size = LIST_LENGTH(pPhyNode->pChildren); + SOperatorInfo** ops = taosMemoryCalloc(size, POINTER_BYTES); + if (ops == NULL) { + return NULL; + } + + for (int32_t i = 0; i < size; ++i) { + SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, i); + ops[i] = createOperator(pChildNode, pTaskInfo, pHandle, pTagCond, pTagIndexCond, pUser, dbname); + if (ops[i] == NULL) { + taosMemoryFree(ops); + return NULL; + } + } + + SOperatorInfo* pOptr = NULL; + if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) { + pOptr = createProjectOperatorInfo(ops[0], (SProjectPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_AGG == type) { + SAggPhysiNode* pAggNode = (SAggPhysiNode*)pPhyNode; + if (pAggNode->pGroupKeys != NULL) { + pOptr = createGroupOperatorInfo(ops[0], pAggNode, pTaskInfo); + } else { + pOptr = createAggregateOperatorInfo(ops[0], pAggNode, pTaskInfo); + } + } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type) { + SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; + pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { + pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { + SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode; + pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL == type) { + SMergeIntervalPhysiNode* pIntervalPhyNode = (SMergeIntervalPhysiNode*)pPhyNode; + pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) { + int32_t children = 0; + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) { + int32_t children = pHandle->numOfVgroups; + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) { + pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) { + pOptr = createGroupSortOperatorInfo(ops[0], (SGroupSortPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE == type) { + SMergePhysiNode* pMergePhyNode = (SMergePhysiNode*)pPhyNode; + pOptr = createMultiwayMergeOperatorInfo(ops, size, pMergePhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION == type) { + SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; + pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) { + pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) { + int32_t children = 0; + pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) { + int32_t children = pHandle->numOfVgroups; + pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) { + pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) { + pOptr = createStreamPartitionOperatorInfo(ops[0], (SStreamPartitionPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE == type) { + SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode; + pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) { + pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) { + pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) { + pOptr = createFillOperatorInfo(ops[0], (SFillPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL == type) { + pOptr = createStreamFillOperatorInfo(ops[0], (SStreamFillPhysiNode*)pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC == type) { + pOptr = createIndefinitOutputOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC == type) { + pOptr = createTimeSliceOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT == type) { + pOptr = createEventwindowOperatorInfo(ops[0], pPhyNode, pTaskInfo); + } else { + terrno = TSDB_CODE_INVALID_PARA; + taosMemoryFree(ops); + return NULL; + } + + taosMemoryFree(ops); + if (pOptr) { + pOptr->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId; + } + + return pOptr; +} + +void destroyOperator(SOperatorInfo* pOperator) { + if (pOperator == NULL) { + return; + } + + if (pOperator->fpSet.closeFn != NULL) { + pOperator->fpSet.closeFn(pOperator->info); + } + + if (pOperator->pDownstream != NULL) { + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + destroyOperator(pOperator->pDownstream[i]); + } + + taosMemoryFreeClear(pOperator->pDownstream); + pOperator->numOfDownstream = 0; + } + + cleanupExprSupp(&pOperator->exprSupp); + taosMemoryFreeClear(pOperator); +} + +int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SArray* pExecInfoList) { + SExplainExecInfo execInfo = {0}; + SExplainExecInfo* pExplainInfo = taosArrayPush(pExecInfoList, &execInfo); + + pExplainInfo->numOfRows = operatorInfo->resultInfo.totalRows; + pExplainInfo->startupCost = operatorInfo->cost.openCost; + pExplainInfo->totalCost = operatorInfo->cost.totalCost; + pExplainInfo->verboseLen = 0; + pExplainInfo->verboseInfo = NULL; + + if (operatorInfo->fpSet.getExplainFn) { + int32_t code = + operatorInfo->fpSet.getExplainFn(operatorInfo, &pExplainInfo->verboseInfo, &pExplainInfo->verboseLen); + if (code) { + qError("%s operator getExplainFn failed, code:%s", GET_TASKID(operatorInfo->pTaskInfo), tstrerror(code)); + return code; + } + } + + int32_t code = 0; + for (int32_t i = 0; i < operatorInfo->numOfDownstream; ++i) { + code = getOperatorExplainExecInfo(operatorInfo->pDownstream[i], pExecInfoList); + if (code != TSDB_CODE_SUCCESS) { + // taosMemoryFreeClear(*pRes); + return TSDB_CODE_OUT_OF_MEMORY; + } + } + + return TSDB_CODE_SUCCESS; +} diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index 86c49e0fc8..02f504bef0 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -13,9 +13,11 @@ * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "functionMgt.h" +#include "operator.h" +#include "querytask.h" typedef struct SProjectOperatorInfo { SOptrBasicInfo binfo; diff --git a/source/libs/executor/src/querytask.c b/source/libs/executor/src/querytask.c new file mode 100644 index 0000000000..b6b250a325 --- /dev/null +++ b/source/libs/executor/src/querytask.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "filter.h" +#include "function.h" +#include "functionMgt.h" +#include "os.h" +#include "querynodes.h" +#include "tfill.h" +#include "tname.h" + +#include "tdatablock.h" +#include "tmsg.h" + +#include "executorInt.h" +#include "index.h" +#include "operator.h" +#include "query.h" +#include "querytask.h" +#include "thash.h" +#include "ttypes.h" +#include "vnode.h" + +#define CLEAR_QUERY_STATUS(q, st) ((q)->status &= (~(st))) + +SExecTaskInfo* doCreateTask(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model) { + SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo)); + if (pTaskInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); + pTaskInfo->cost.created = taosGetTimestampUs(); + + pTaskInfo->execModel = model; + pTaskInfo->stopInfo.pStopInfo = taosArrayInit(4, sizeof(SExchangeOpStopInfo)); + pTaskInfo->pResultBlockList = taosArrayInit(128, POINTER_BYTES); + + taosInitRWLatch(&pTaskInfo->lock); + + pTaskInfo->id.vgId = vgId; + pTaskInfo->id.queryId = queryId; + pTaskInfo->id.str = taosMemoryMalloc(64); + buildTaskId(taskId, queryId, pTaskInfo->id.str); + + return pTaskInfo; +} + +bool isTaskKilled(SExecTaskInfo* pTaskInfo) { return (0 != pTaskInfo->code); } + +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode) { + pTaskInfo->code = rspCode; + stopTableScanOperator(pTaskInfo->pRoot, pTaskInfo->id.str); +} + +void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status) { + if (status == TASK_NOT_COMPLETED) { + pTaskInfo->status = status; + } else { + // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first + CLEAR_QUERY_STATUS(pTaskInfo, TASK_NOT_COMPLETED); + pTaskInfo->status |= status; + } +} + +int32_t createExecTaskInfo(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, + int32_t vgId, char* sql, EOPTR_EXEC_MODEL model) { + *pTaskInfo = doCreateTask(pPlan->id.queryId, taskId, vgId, model); + if (*pTaskInfo == NULL) { + taosMemoryFree(sql); + return terrno; + } + + if (pHandle) { + if (pHandle->pStateBackend) { + (*pTaskInfo)->streamInfo.pState = pHandle->pStateBackend; + } + } + + TSWAP((*pTaskInfo)->sql, sql); + + (*pTaskInfo)->pSubplan = pPlan; + (*pTaskInfo)->pRoot = createOperator(pPlan->pNode, *pTaskInfo, pHandle, pPlan->pTagCond, pPlan->pTagIndexCond, + pPlan->user, pPlan->dbFName); + + if (NULL == (*pTaskInfo)->pRoot) { + int32_t code = (*pTaskInfo)->code; + doDestroyTask(*pTaskInfo); + return code; + } else { + return TSDB_CODE_SUCCESS; + } +} + +void cleanupQueriedTableScanInfo(SSchemaInfo* pSchemaInfo) { + taosMemoryFreeClear(pSchemaInfo->dbname); + taosMemoryFreeClear(pSchemaInfo->tablename); + tDeleteSSchemaWrapper(pSchemaInfo->sw); + tDeleteSSchemaWrapper(pSchemaInfo->qsw); +} + +int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo) { + SMetaReader mr = {0}; + if (pHandle == NULL) { + terrno = TSDB_CODE_INVALID_PARA; + return terrno; + } + + metaReaderInit(&mr, pHandle->meta, 0); + int32_t code = metaGetTableEntryByUidCache(&mr, pScanNode->uid); + if (code != TSDB_CODE_SUCCESS) { + qError("failed to get the table meta, uid:0x%" PRIx64 ", suid:0x%" PRIx64 ", %s", pScanNode->uid, pScanNode->suid, + GET_TASKID(pTaskInfo)); + + metaReaderClear(&mr); + return terrno; + } + + SSchemaInfo* pSchemaInfo = &pTaskInfo->schemaInfo; + + pSchemaInfo->tablename = taosStrdup(mr.me.name); + pSchemaInfo->dbname = taosStrdup(dbName); + + if (mr.me.type == TSDB_SUPER_TABLE) { + pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow); + pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version; + } else if (mr.me.type == TSDB_CHILD_TABLE) { + tDecoderClear(&mr.coder); + + tb_uid_t suid = mr.me.ctbEntry.suid; + code = metaGetTableEntryByUidCache(&mr, suid); + if (code != TSDB_CODE_SUCCESS) { + metaReaderClear(&mr); + return terrno; + } + + pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow); + pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version; + } else { + pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.ntbEntry.schemaRow); + } + + metaReaderClear(&mr); + + pSchemaInfo->qsw = extractQueriedColumnSchema(pScanNode); + return TSDB_CODE_SUCCESS; +} + +SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) { + int32_t numOfCols = LIST_LENGTH(pScanNode->pScanCols); + int32_t numOfTags = LIST_LENGTH(pScanNode->pScanPseudoCols); + + SSchemaWrapper* pqSw = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); + pqSw->pSchema = taosMemoryCalloc(numOfCols + numOfTags, sizeof(SSchema)); + + for (int32_t i = 0; i < numOfCols; ++i) { + STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanCols, i); + SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; + + SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++]; + pSchema->colId = pColNode->colId; + pSchema->type = pColNode->node.resType.type; + pSchema->bytes = pColNode->node.resType.bytes; + tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name)); + } + + // this the tags and pseudo function columns, we only keep the tag columns + for (int32_t i = 0; i < numOfTags; ++i) { + STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanPseudoCols, i); + + int32_t type = nodeType(pNode->pExpr); + if (type == QUERY_NODE_COLUMN) { + SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; + + SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++]; + pSchema->colId = pColNode->colId; + pSchema->type = pColNode->node.resType.type; + pSchema->bytes = pColNode->node.resType.bytes; + tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name)); + } + } + + return pqSw; +} + +static void cleanupStreamInfo(SStreamTaskInfo* pStreamInfo) { tDeleteSSchemaWrapper(pStreamInfo->schema); } + +static void freeBlock(void* pParam) { + SSDataBlock* pBlock = *(SSDataBlock**)pParam; + blockDataDestroy(pBlock); +} + +void doDestroyTask(SExecTaskInfo* pTaskInfo) { + qDebug("%s execTask is freed", GET_TASKID(pTaskInfo)); + destroyOperator(pTaskInfo->pRoot); + cleanupQueriedTableScanInfo(&pTaskInfo->schemaInfo); + cleanupStreamInfo(&pTaskInfo->streamInfo); + + if (!pTaskInfo->localFetch.localExec) { + nodesDestroyNode((SNode*)pTaskInfo->pSubplan); + } + + taosArrayDestroyEx(pTaskInfo->pResultBlockList, freeBlock); + taosArrayDestroy(pTaskInfo->stopInfo.pStopInfo); + taosMemoryFreeClear(pTaskInfo->sql); + taosMemoryFreeClear(pTaskInfo->id.str); + taosMemoryFreeClear(pTaskInfo); +} + +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst) { + char* p = dst; + + int32_t offset = 6; + memcpy(p, "TID:0x", offset); + offset += tintToHex(taskId, &p[offset]); + + memcpy(&p[offset], " QID:0x", 7); + offset += 7; + offset += tintToHex(queryId, &p[offset]); + + p[offset] = 0; +} diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6c7d7f39dc..7933e26282 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "function.h" #include "functionMgt.h" @@ -30,6 +30,8 @@ #include "tcompare.h" #include "thash.h" #include "ttypes.h" +#include "operator.h" +#include "querytask.h" int32_t scanDebug = 0; @@ -2300,7 +2302,7 @@ static void destroyStreamScanOperatorInfo(void* param) { SStreamScanInfo* pStreamScan = (SStreamScanInfo*)param; if (pStreamScan->pTableScanOp && pStreamScan->pTableScanOp->info) { - destroyOperatorInfo(pStreamScan->pTableScanOp); + destroyOperator(pStreamScan->pTableScanOp); } if (pStreamScan->tqReader) { diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index cb0f1aa068..10933f285c 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -13,8 +13,10 @@ * along with this program. If not, see . */ +#include "executorInt.h" #include "filter.h" -#include "executorimpl.h" +#include "operator.h" +#include "querytask.h" #include "tdatablock.h" typedef struct SSortOperatorInfo { diff --git a/source/libs/executor/src/sysscanoperator.c b/source/libs/executor/src/sysscanoperator.c index c78e6002cd..c75c49fe77 100644 --- a/source/libs/executor/src/sysscanoperator.c +++ b/source/libs/executor/src/sysscanoperator.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "function.h" #include "functionMgt.h" @@ -31,6 +31,9 @@ #include "thash.h" #include "ttypes.h" #include "vnode.h" +#include "operator.h" +#include "querytask.h" + typedef int (*__optSysFilter)(void* a, void* b, int16_t dtype); typedef int32_t (*__sys_filte)(void* pMeta, SNode* cond, SArray* result); diff --git a/source/libs/executor/src/tfill.c b/source/libs/executor/src/tfill.c index 8376caace0..fc4e82b57f 100644 --- a/source/libs/executor/src/tfill.c +++ b/source/libs/executor/src/tfill.c @@ -20,7 +20,7 @@ #include "tmsg.h" #include "ttypes.h" -#include "executorimpl.h" +#include "executorInt.h" #include "tcommon.h" #include "thash.h" #include "ttime.h" diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index f0e25d8cc5..29e3668ec4 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -12,10 +12,12 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "function.h" #include "functionMgt.h" +#include "operator.h" +#include "querytask.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 8993681ef9..700ca92881 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -12,10 +12,12 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#include "executorimpl.h" +#include "executorInt.h" #include "filter.h" #include "function.h" #include "functionMgt.h" +#include "operator.h" +#include "querytask.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" @@ -27,6 +29,11 @@ #define IS_FINAL_OP(op) ((op)->isFinal) #define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); +typedef struct SStateWindowInfo { + SResultWindowInfo winInfo; + SStateKeys* pStateKey; +} SStateWindowInfo; + typedef struct SSessionAggOperatorInfo { SOptrBasicInfo binfo; @@ -2798,7 +2805,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { int32_t size = taosArrayGetSize(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); - destroyOperatorInfo(pChild); + destroyOperator(pChild); } taosArrayDestroy(pInfo->pChildren); } @@ -3776,7 +3783,7 @@ void destroyStreamStateOperatorInfo(void* param) { int32_t size = taosArrayGetSize(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); - destroyOperatorInfo(pChild); + destroyOperator(pChild); } taosArrayDestroy(pInfo->pChildren); } diff --git a/source/libs/executor/test/executorTests.cpp b/source/libs/executor/test/executorTests.cpp index b9a696170a..cefe12990d 100644 --- a/source/libs/executor/test/executorTests.cpp +++ b/source/libs/executor/test/executorTests.cpp @@ -24,15 +24,12 @@ #include "os.h" #include "executor.h" -#include "executorimpl.h" +#include "executorInt.h" #include "function.h" +#include "operator.h" #include "taos.h" #include "tdatablock.h" #include "tdef.h" -#include "tglobal.h" -#include "tmsg.h" -#include "tname.h" -#include "trpc.h" #include "tvariant.h" namespace { diff --git a/source/libs/executor/test/lhashTests.cpp b/source/libs/executor/test/lhashTests.cpp index 24570ff788..92f7652d8d 100644 --- a/source/libs/executor/test/lhashTests.cpp +++ b/source/libs/executor/test/lhashTests.cpp @@ -15,7 +15,7 @@ #include #include -#include "executorimpl.h" +#include "executorInt.h" #include "tlinearhash.h" #pragma GCC diagnostic push diff --git a/source/libs/executor/test/sortTests.cpp b/source/libs/executor/test/sortTests.cpp index f35d07804e..8122d7d6a9 100644 --- a/source/libs/executor/test/sortTests.cpp +++ b/source/libs/executor/test/sortTests.cpp @@ -26,7 +26,7 @@ #include "os.h" #include "executor.h" -#include "executorimpl.h" +#include "executorInt.h" #include "taos.h" #include "tcompare.h" #include "tdatablock.h" diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index c71bff24bf..7ea093973a 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -167,20 +167,24 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { batchCnt++; - qDebug("task %d scan exec block num %d, block limit %d", pTask->id.taskId, batchCnt, batchSz); + qDebug("s-task:%s scan exec block num %d, block limit %d", pTask->id.idStr, batchCnt, batchSz); - if (batchCnt >= batchSz) break; + if (batchCnt >= batchSz) { + break; + } } + if (taosArrayGetSize(pRes) == 0) { if (finished) { taosArrayDestroy(pRes); - qDebug("task %d finish recover exec task ", pTask->id.taskId); + qDebug("s-task:%s finish recover exec task ", pTask->id.idStr); break; } else { - qDebug("task %d continue recover exec task ", pTask->id.taskId); + qDebug("s-task:%s continue recover exec task ", pTask->id.idStr); continue; } } + SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); if (qRes == NULL) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 55c745e417..0d1440fbde 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -20,6 +20,8 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { if (pTask->taskLevel == TASK_LEVEL__SOURCE) { atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); + qDebug("s-task:%s set task status:%d and start recover", pTask->id.idStr, pTask->status.taskStatus); + streamSetParamForRecover(pTask); streamSourceRecoverPrepareStep1(pTask, version); @@ -197,7 +199,6 @@ int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* } int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) { - // return streamScanExec(pTask, 100); } @@ -210,8 +211,11 @@ int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { void* exec = pTask->exec.pExecutor; + + qDebug("s-task:%s recover step2(blocking stage) started", pTask->id.idStr); if (qStreamSourceRecoverStep2(exec, ver) < 0) { } + return streamScanExec(pTask, 100); }