Merge remote-tracking branch 'origin/3.0' into feature/dnode3

This commit is contained in:
Shengliang Guan 2022-01-09 18:50:01 -08:00
commit 95347ff6e1
23 changed files with 1554 additions and 669 deletions

View File

@ -20,16 +20,16 @@
extern "C" {
#endif
typedef void* qinfo_t;
typedef void* qTaskInfo_t;
/**
* create the qinfo object according to QueryTableMsg
* @param tsdb
* @param pQueryTableMsg
* @param qinfo
* @param pTaskInfo
* @return
*/
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableInfo* pQueryTableMsg, qinfo_t* qinfo, uint64_t qId);
int32_t qCreateTask(void* tsdb, int32_t vgId, void* pQueryTableMsg, qTaskInfo_t* pTaskInfo, uint64_t qId);
/**
* the main query execution function, including query on both table and multiple tables,
@ -38,7 +38,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableInfo* pQueryTableM
* @param qinfo
* @return
*/
bool qTableQuery(qinfo_t qinfo, uint64_t *qId);
bool qExecTask(qTaskInfo_t qinfo, uint64_t *qId);
/**
* Retrieve the produced results information, if current query is not paused or completed,
@ -48,7 +48,7 @@ bool qTableQuery(qinfo_t qinfo, uint64_t *qId);
* @param qinfo
* @return
*/
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext);
int32_t qRetrieveQueryResultInfo(qTaskInfo_t qinfo, bool* buildRes, void* pRspContext);
/**
*
@ -60,41 +60,41 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex
* @param contLen payload length
* @return
*/
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec);
int32_t qDumpRetrieveResult(qTaskInfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec);
/**
* return the transporter context (RPC)
* @param qinfo
* @return
*/
void* qGetResultRetrieveMsg(qinfo_t qinfo);
void* qGetResultRetrieveMsg(qTaskInfo_t qinfo);
/**
* kill the ongoing query and free the query handle and corresponding resources automatically
* @param qinfo qhandle
* @return
*/
int32_t qKillQuery(qinfo_t qinfo);
int32_t qKillTask(qTaskInfo_t qinfo);
/**
* return whether query is completed or not
* @param qinfo
* @return
*/
int32_t qIsQueryCompleted(qinfo_t qinfo);
int32_t qIsQueryCompleted(qTaskInfo_t qinfo);
/**
* destroy query info structure
* @param qHandle
*/
void qDestroyQueryInfo(qinfo_t qHandle);
void qDestroyTask(qTaskInfo_t qHandle);
/**
* Get the queried table uid
* @param qHandle
* @return
*/
int64_t qGetQueriedTableUid(qinfo_t qHandle);
int64_t qGetQueriedTableUid(qTaskInfo_t qHandle);
/**
* Extract the qualified table id list, and than pass them to the TSDB driver to load the required table data blocks.
@ -121,7 +121,7 @@ int32_t qCreateTableGroupByGroupExpr(SArray* pTableIdList, TSKEY skey, STableGro
* @param type operation type: ADD|DROP
* @return
*/
int32_t qUpdateQueriedTableIdList(qinfo_t qinfo, int64_t uid, int32_t type);
int32_t qUpdateQueriedTableIdList(qTaskInfo_t qinfo, int64_t uid, int32_t type);
//================================================================================================
// query handle management
@ -130,13 +130,13 @@ int32_t qUpdateQueriedTableIdList(qinfo_t qinfo, int64_t uid, int32_t type);
* @param vgId
* @return
*/
void* qOpenQueryMgmt(int32_t vgId);
void* qOpenTaskMgmt(int32_t vgId);
/**
* broadcast the close information and wait for all query stop.
* @param pExecutor
*/
void qQueryMgmtNotifyClosed(void* pExecutor);
void qTaskMgmtNotifyClosing(void* pExecutor);
/**
* Re-open the query handle management module when opening the vnode again.
@ -148,7 +148,7 @@ void qQueryMgmtReOpen(void *pExecutor);
* Close query mgmt and clean up resources.
* @param pExecutor
*/
void qCleanupQueryMgmt(void* pExecutor);
void qCleanupTaskMgmt(void* pExecutor);
/**
* Add the query into the query mgmt object
@ -157,7 +157,7 @@ void qCleanupQueryMgmt(void* pExecutor);
* @param qInfo
* @return
*/
void** qRegisterQInfo(void* pMgmt, uint64_t qId, void *qInfo);
void** qRegisterTask(void* pMgmt, uint64_t qId, void *qInfo);
/**
* acquire the query handle according to the key from query mgmt object.
@ -165,7 +165,7 @@ void** qRegisterQInfo(void* pMgmt, uint64_t qId, void *qInfo);
* @param key
* @return
*/
void** qAcquireQInfo(void* pMgmt, uint64_t key);
void** qAcquireTask(void* pMgmt, uint64_t key);
/**
* release the query handle and decrease the reference count in cache
@ -174,7 +174,7 @@ void** qAcquireQInfo(void* pMgmt, uint64_t key);
* @param freeHandle
* @return
*/
void** qReleaseQInfo(void* pMgmt, void* pQInfo);
void** qReleaseTask(void* pMgmt, void* pQInfo, bool freeHandle);
/**
* De-register the query handle from the management module and free it immediately.

View File

@ -89,7 +89,7 @@ enum {
};
enum {
MASTER_SCAN = 0x0u,
MAIN_SCAN = 0x0u,
REVERSE_SCAN = 0x1u,
REPEAT_SCAN = 0x2u, //repeat scan belongs to the master scan
MERGE_STAGE = 0x20u,
@ -183,7 +183,6 @@ typedef struct tExprNode {
struct {// function node
char functionName[FUNCTIONS_NAME_MAX_LENGTH];
// int32_t functionId;
int32_t num;
// Note that the attribute of pChild is not the parameter of function, it is the columns that involved in the

View File

@ -155,7 +155,7 @@ int32_t qCreateQueryDag(const struct SQueryNode* pQueryInfo, struct SQueryDag**
// @subplan subplan to be schedule
// @templateId templateId of a group of datasource subplans of this @subplan
// @ep one execution location of this group of datasource subplans
int32_t qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep);
void qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep);
int32_t qExplainQuery(const struct SQueryNode* pQueryInfo, struct SEpSet* pQnode, char** str);

View File

@ -88,37 +88,37 @@ typedef struct SResultRowPool {
SArray* pData; // SArray<void*>
} SResultRowPool;
struct SQueryAttr;
struct SQueryRuntimeEnv;
struct STaskAttr;
struct STaskRuntimeEnv;
struct SUdfInfo;
int32_t getOutputInterResultBufSize(struct SQueryAttr* pQueryAttr);
int32_t getOutputInterResultBufSize(struct STaskAttr* pQueryAttr);
size_t getResultRowSize(struct SQueryRuntimeEnv* pRuntimeEnv);
size_t getResultRowSize(struct STaskRuntimeEnv* pRuntimeEnv);
int32_t initResultRowInfo(SResultRowInfo* pResultRowInfo, int32_t size, int16_t type);
void cleanupResultRowInfo(SResultRowInfo* pResultRowInfo);
void resetResultRowInfo(struct SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo);
void resetResultRowInfo(struct STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo);
int32_t numOfClosedResultRows(SResultRowInfo* pResultRowInfo);
void closeAllResultRows(SResultRowInfo* pResultRowInfo);
int32_t initResultRow(SResultRow *pResultRow);
void closeResultRow(SResultRowInfo* pResultRowInfo, int32_t slot);
bool isResultRowClosed(SResultRowInfo *pResultRowInfo, int32_t slot);
void clearResultRow(struct SQueryRuntimeEnv* pRuntimeEnv, SResultRow* pResultRow, int16_t type);
void clearResultRow(struct STaskRuntimeEnv* pRuntimeEnv, SResultRow* pResultRow, int16_t type);
struct SResultRowEntryInfo* getResultCell(const SResultRow* pRow, int32_t index, int32_t* offset);
void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr);
void* freeColumnInfo(SColumnInfo* pColumnInfo, int32_t numOfCols);
int32_t getRowNumForMultioutput(struct SQueryAttr* pQueryAttr, bool topBottomQuery, bool stable);
int32_t getRowNumForMultioutput(struct STaskAttr* pQueryAttr, bool topBottomQuery, bool stable);
static FORCE_INLINE SResultRow *getResultRow(SResultRowInfo *pResultRowInfo, int32_t slot) {
assert(pResultRowInfo != NULL && slot >= 0 && slot < pResultRowInfo->size);
return pResultRowInfo->pResult[slot];
}
static FORCE_INLINE char* getPosInResultPage(struct SQueryAttr* pQueryAttr, SFilePage* page, int32_t rowOffset,
static FORCE_INLINE char* getPosInResultPage(struct STaskAttr* pQueryAttr, SFilePage* page, int32_t rowOffset,
int32_t offset) {
assert(rowOffset >= 0 && pQueryAttr != NULL);
@ -155,7 +155,7 @@ bool hasRemainData(SGroupResInfo* pGroupResInfo);
bool incNextGroup(SGroupResInfo* pGroupResInfo);
int32_t getNumOfTotalRes(SGroupResInfo* pGroupResInfo);
int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, struct SQueryRuntimeEnv *pRuntimeEnv, int32_t* offset);
int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, struct STaskRuntimeEnv *pRuntimeEnv, int32_t* offset);
int32_t initUdfInfo(struct SUdfInfo* pUdfInfo);

View File

@ -21,13 +21,14 @@
#include "tvariant.h"
#include "thash.h"
//#include "parser.h"
#include "executil.h"
#include "taosdef.h"
#include "tarray.h"
#include "tfilter.h"
#include "tlockfree.h"
#include "tpagedfile.h"
#include "planner.h"
struct SColumnFilterElem;
@ -65,7 +66,6 @@ enum {
QUERY_OVER = 0x4u,
};
typedef struct SResultRowCell {
uint64_t groupId;
SResultRow *pRow;
@ -100,7 +100,7 @@ typedef struct STableQueryInfo {
TSKEY lastKey;
int32_t groupIndex; // group id in table list
SVariant tag;
STimeWindow win;
STimeWindow win; // todo remove it later
STSCursor cur;
void* pTable; // for retrieve the page id list
SResultRowInfo resInfo;
@ -128,31 +128,34 @@ typedef struct {
int64_t sumRunTimes;
} SOperatorProfResult;
typedef struct SQueryCostInfo {
uint64_t loadStatisTime;
uint64_t loadFileBlockTime;
uint64_t loadDataInCacheTime;
uint64_t loadStatisSize;
uint64_t loadFileBlockSize;
uint64_t loadDataInCacheSize;
uint64_t loadDataTime;
uint64_t totalRows;
uint64_t totalCheckedRows;
uint32_t totalBlocks;
uint32_t loadBlocks;
uint32_t loadBlockStatis;
uint32_t discardBlocks;
uint64_t elapsedTime;
uint64_t firstStageMergeTime;
uint64_t winInfoSize;
uint64_t tableInfoSize;
uint64_t hashSize;
uint64_t numOfTimeWindows;
typedef struct STaskCostInfo {
int64_t start;
int64_t end;
SArray* queryProfEvents; //SArray<SQueryProfEvent>
SHashObj* operatorProfResults; //map<operator_type, SQueryProfEvent>
} SQueryCostInfo;
uint64_t loadStatisTime;
uint64_t loadFileBlockTime;
uint64_t loadDataInCacheTime;
uint64_t loadStatisSize;
uint64_t loadFileBlockSize;
uint64_t loadDataInCacheSize;
uint64_t loadDataTime;
uint64_t totalRows;
uint64_t totalCheckedRows;
uint32_t totalBlocks;
uint32_t loadBlocks;
uint32_t loadBlockStatis;
uint32_t discardBlocks;
uint64_t elapsedTime;
uint64_t firstStageMergeTime;
uint64_t winInfoSize;
uint64_t tableInfoSize;
uint64_t hashSize;
uint64_t numOfTimeWindows;
SArray *queryProfEvents; //SArray<SQueryProfEvent>
SHashObj *operatorProfResults; //map<operator_type, SQueryProfEvent>
} STaskCostInfo;
typedef struct {
int64_t vgroupLimit;
@ -166,7 +169,7 @@ typedef struct {
// The basic query information extracted from the SQueryInfo tree to support the
// execution of query in a data node.
typedef struct SQueryAttr {
typedef struct STaskAttr {
SLimit limit;
SLimit slimit;
@ -229,16 +232,40 @@ typedef struct SQueryAttr {
STableGroupInfo tableGroupInfo; // table <tid, last_key> list SArray<STableKeyInfo>
int32_t vgId;
SArray *pUdfInfo; // no need to free
} SQueryAttr;
} STaskAttr;
typedef SSDataBlock* (*__operator_fn_t)(void* param, bool* newgroup);
typedef void (*__optr_cleanup_fn_t)(void* param, int32_t num);
struct SOperatorInfo;
typedef struct SQueryRuntimeEnv {
typedef struct STaskIdInfo {
uint64_t queryId; // this is also a request id
uint64_t subplanId;
uint64_t templateId;
uint64_t taskId; // this is a subplan id
} STaskIdInfo;
typedef struct STaskInfo {
STaskIdInfo id;
char *content;
uint32_t status;
STimeWindow window;
STaskCostInfo cost;
int64_t owner; // if it is in execution
STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray<STableQueryInfo*> structure
pthread_mutex_t lock; // used to synchronize the rsp/query threads
// tsem_t ready;
// int32_t dataReady; // denote if query result is ready or not
// void* rspContext; // response context
char *sql; // query sql string
jmp_buf env;
} STaskInfo;
typedef struct STaskRuntimeEnv {
jmp_buf env;
SQueryAttr* pQueryAttr;
STaskAttr* pQueryAttr;
uint32_t status; // query status
void* qinfo;
uint8_t scanFlag; // denotes reversed scan of data or not
@ -271,7 +298,7 @@ typedef struct SQueryRuntimeEnv {
SRspResultInfo resultInfo;
SHashObj *pTableRetrieveTsMap;
struct SUdfInfo *pUdfInfo;
} SQueryRuntimeEnv;
} STaskRuntimeEnv;
enum {
OP_IN_EXECUTING = 1,
@ -287,10 +314,11 @@ typedef struct SOperatorInfo {
char *name; // name, used to show the query execution plan
void *info; // extension attribution
SExprInfo *pExpr;
SQueryRuntimeEnv *pRuntimeEnv;
STaskRuntimeEnv *pRuntimeEnv;
STaskInfo *pTaskInfo;
struct SOperatorInfo **upstream; // upstream pointer list
int32_t numOfUpstream; // number of upstream. The value is always ONE expect for join operator
struct SOperatorInfo **pDownstream; // downstram pointer list
int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator
__operator_fn_t exec;
__optr_cleanup_fn_t cleanup;
} SOperatorInfo;
@ -312,8 +340,8 @@ typedef struct SQInfo {
int32_t code; // error code to returned to client
int64_t owner; // if it is in execution
SQueryRuntimeEnv runtimeEnv;
SQueryAttr query;
STaskRuntimeEnv runtimeEnv;
STaskAttr query;
void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables;
pthread_mutex_t lock; // used to synchronize the rsp/query threads
@ -322,10 +350,10 @@ typedef struct SQInfo {
void* rspContext; // response context
int64_t startExecTs; // start to exec timestamp
char* sql; // query sql string
SQueryCostInfo summary;
STaskCostInfo summary;
} SQInfo;
typedef struct SQueryParam {
typedef struct STaskParam {
char *sql;
char *tagCond;
char *colCond;
@ -345,7 +373,7 @@ typedef struct SQueryParam {
int32_t tableScanOperator;
SArray *pOperator;
struct SUdfInfo *pUdfInfo;
} SQueryParam;
} STaskParam;
typedef struct STableScanInfo {
void *pQueryHandle;
@ -366,9 +394,12 @@ typedef struct STableScanInfo {
SSDataBlock block;
int32_t numOfOutput;
int64_t elapsedTime;
int32_t tableIndex;
int32_t prevGroupId; // previous table group id
int32_t prevGroupId; // previous table group id
int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan
STimeWindow window;
} STableScanInfo;
typedef struct STagScanInfo {
@ -512,34 +543,34 @@ typedef struct SOrderOperatorInfo {
void appendUpstream(SOperatorInfo* p, SOperatorInfo* pUpstream);
SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime);
SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime);
SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv);
SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime);
SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime);
SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv);
SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream);
SOperatorInfo* createTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createAllTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createSWindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createFillOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult);
SOperatorInfo* createGroupbyOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createMultiTableAggOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createTagScanOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv);
SOperatorInfo* createMultiwaySortOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput,
SOperatorInfo* createAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createProjectOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream);
SOperatorInfo* createTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createAllTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createSWindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createFillOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult);
SOperatorInfo* createGroupbyOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createMultiTableAggOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createTagScanOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createDistinctOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv);
SOperatorInfo* createMultiwaySortOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput,
int32_t numOfRows, void* merger);
SOperatorInfo* createGlobalAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp);
SOperatorInfo* createStatewindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createSLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* merger, bool multigroupResult);
SOperatorInfo* createFilterOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr,
SOperatorInfo* createGlobalAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp);
SOperatorInfo* createStatewindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput);
SOperatorInfo* createSLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* merger, bool multigroupResult);
SOperatorInfo* createFilterOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr,
int32_t numOfOutput, SColumnInfo* pCols, int32_t numOfFilter);
SOperatorInfo* createJoinOperatorInfo(SOperatorInfo** pUpstream, int32_t numOfUpstream, SSchema* pSchema, int32_t numOfOutput);
SOperatorInfo* createOrderOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal);
SOperatorInfo* createOrderOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal);
SSDataBlock* doGlobalAggregate(void* param, bool* newgroup);
SSDataBlock* doMultiwayMergeSort(void* param, bool* newgroup);
@ -561,8 +592,8 @@ void updateOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity, int32_t numOf
void clearOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity);
void copyTsColoum(SSDataBlock* pRes, SQLFunctionCtx* pCtx, int32_t numOfOutput);
void freeParam(SQueryParam *param);
int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SQueryParam* param);
void freeParam(STaskParam *param);
int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, STaskParam* param);
int32_t createQueryFunc(SQueriedTableInfo* pTableInfo, int32_t numOfOutput, SExprInfo** pExprInfo,
SSqlExpr** pExprMsg, SColumnInfo* pTagCols, int32_t queryType, void* pMsg, struct SUdfInfo* pUdfInfo);
@ -575,13 +606,13 @@ SGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pCo
SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, SFilterInfo* pFilters, int32_t vgId, char* sql, uint64_t qId, struct SUdfInfo* pUdfInfo);
int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, SQueryParam* param, char* start,
int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, STaskParam* param, char* start,
int32_t prevResultLen, void* merger);
int32_t createFilterInfo(SQueryAttr* pQueryAttr, uint64_t qId);
int32_t createFilterInfo(STaskAttr* pQueryAttr, uint64_t qId);
void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters);
STableQueryInfo *createTableQueryInfo(SQueryAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf);
STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf);
STableQueryInfo* createTmpTableQueryInfo(STimeWindow win);
int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, void *pQueryMsg);
@ -590,9 +621,9 @@ bool isQueryKilled(SQInfo *pQInfo);
int32_t checkForQueryBuf(size_t numOfTables);
bool checkNeedToCompressQueryCol(SQInfo *pQInfo);
bool doBuildResCheck(SQInfo* pQInfo);
void setQueryStatus(SQueryRuntimeEnv *pRuntimeEnv, int8_t status);
void setQueryStatus(STaskRuntimeEnv *pRuntimeEnv, int8_t status);
bool onlyQueryTags(SQueryAttr* pQueryAttr);
bool onlyQueryTags(STaskAttr* pQueryAttr);
void destroyUdfInfo(struct SUdfInfo* pUdfInfo);
bool isValidQInfo(void *param);
@ -607,8 +638,8 @@ void publishQueryAbortEvent(SQInfo* pQInfo, int32_t code);
void calculateOperatorProfResults(SQInfo* pQInfo);
void queryCostStatis(SQInfo *pQInfo);
void freeQInfo(SQInfo *pQInfo);
void freeQueryAttr(SQueryAttr *pQuery);
void doDestroyTask(SQInfo *pQInfo);
void freeQueryAttr(STaskAttr *pQuery);
int32_t getMaximumIdleDurationSec();

View File

@ -30,7 +30,7 @@ typedef struct SCompSupporter {
int32_t order;
} SCompSupporter;
int32_t getRowNumForMultioutput(SQueryAttr* pQueryAttr, bool topBottomQuery, bool stable) {
int32_t getRowNumForMultioutput(STaskAttr* pQueryAttr, bool topBottomQuery, bool stable) {
if (pQueryAttr && (!stable)) {
for (int16_t i = 0; i < pQueryAttr->numOfOutput; ++i) {
// if (pQueryAttr->pExpr1[i].base. == FUNCTION_TOP || pQueryAttr->pExpr1[i].base.functionId == FUNCTION_BOTTOM) {
@ -42,7 +42,7 @@ int32_t getRowNumForMultioutput(SQueryAttr* pQueryAttr, bool topBottomQuery, boo
return 1;
}
int32_t getOutputInterResultBufSize(SQueryAttr* pQueryAttr) {
int32_t getOutputInterResultBufSize(STaskAttr* pQueryAttr) {
int32_t size = 0;
for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) {
@ -86,7 +86,7 @@ void cleanupResultRowInfo(SResultRowInfo *pResultRowInfo) {
tfree(pResultRowInfo->pResult);
}
void resetResultRowInfo(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo) {
void resetResultRowInfo(STaskRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo) {
if (pResultRowInfo == NULL || pResultRowInfo->capacity == 0) {
return;
}
@ -136,7 +136,7 @@ void closeResultRow(SResultRowInfo *pResultRowInfo, int32_t slot) {
getResultRow(pResultRowInfo, slot)->closed = true;
}
void clearResultRow(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResultRow, int16_t type) {
void clearResultRow(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResultRow, int16_t type) {
if (pResultRow == NULL) {
return;
}
@ -174,8 +174,8 @@ struct SResultRowEntryInfo* getResultCell(const SResultRow* pRow, int32_t index,
return NULL;
}
size_t getResultRowSize(SQueryRuntimeEnv* pRuntimeEnv) {
SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr;
size_t getResultRowSize(STaskRuntimeEnv* pRuntimeEnv) {
STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr;
return 0;
// return (pQueryAttr->numOfOutput * sizeof(SResultRowEntryInfo)) + pQueryAttr->interBufSize + sizeof(SResultRow);
}
@ -393,8 +393,8 @@ int32_t getNumOfTotalRes(SGroupResInfo* pGroupResInfo) {
return (int32_t) taosArrayGetSize(pGroupResInfo->pRows);
}
static int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow, int32_t* rowCellInfoOffset) {
SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr;
static int64_t getNumOfResultWindowRes(STaskRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow, int32_t* rowCellInfoOffset) {
STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr;
for (int32_t j = 0; j < pQueryAttr->numOfOutput; ++j) {
int32_t functionId = 0;//pQueryAttr->pExpr1[j].base.functionId;
@ -488,7 +488,7 @@ int32_t tsDescOrder(const void* p1, const void* p2) {
}
}
void orderTheResultRows(SQueryRuntimeEnv* pRuntimeEnv) {
void orderTheResultRows(STaskRuntimeEnv* pRuntimeEnv) {
__compar_fn_t fn = NULL;
if (pRuntimeEnv->pQueryAttr->order.order == TSDB_ORDER_ASC) {
fn = tsAscOrder;
@ -499,7 +499,7 @@ void orderTheResultRows(SQueryRuntimeEnv* pRuntimeEnv) {
taosArraySort(pRuntimeEnv->pResultRowArrayList, fn);
}
static int32_t mergeIntoGroupResultImplRv(SQueryRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, uint64_t groupId, int32_t* rowCellInfoOffset) {
static int32_t mergeIntoGroupResultImplRv(STaskRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, uint64_t groupId, int32_t* rowCellInfoOffset) {
if (!pGroupResInfo->ordered) {
orderTheResultRows(pRuntimeEnv);
pGroupResInfo->ordered = true;
@ -528,7 +528,7 @@ static int32_t mergeIntoGroupResultImplRv(SQueryRuntimeEnv *pRuntimeEnv, SGroupR
return TSDB_CODE_SUCCESS;
}
static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(SQueryRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, SArray *pTableList,
static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, SArray *pTableList,
int32_t* rowCellInfoOffset) {
bool ascQuery = QUERY_IS_ASC_QUERY(pRuntimeEnv->pQueryAttr);
@ -630,7 +630,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(SQueryRuntimeEnv *pRuntimeEn
return code;
}
int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, SQueryRuntimeEnv* pRuntimeEnv, int32_t* offset) {
int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, STaskRuntimeEnv* pRuntimeEnv, int32_t* offset) {
int64_t st = taosGetTimestampUs();
while (pGroupResInfo->currentGroup < pGroupResInfo->totalGroup) {

View File

@ -0,0 +1,579 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "tcache.h"
#include "tglobal.h"
#include "tmsg.h"
#include "exception.h"
#include "thash.h"
#include "executorimpl.h"
#include "executor.h"
#include "tlosertree.h"
#include "ttypes.h"
#include "query.h"
typedef struct STaskMgmt {
pthread_mutex_t lock;
SCacheObj *qinfoPool; // query handle pool
int32_t vgId;
bool closed;
} STaskMgmt;
static void taskMgmtKillTaskFn(void* handle, void* param1) {
void** fp = (void**)handle;
qKillTask(*fp);
}
static void freeqinfoFn(void *qhandle) {
void** handle = qhandle;
if (handle == NULL || *handle == NULL) {
return;
}
qKillTask(*handle);
qDestroyTask(*handle);
}
void freeParam(STaskParam *param) {
tfree(param->sql);
tfree(param->tagCond);
tfree(param->tbnameCond);
tfree(param->pTableIdList);
taosArrayDestroy(param->pOperator);
tfree(param->pExprs);
tfree(param->pSecExprs);
tfree(param->pExpr);
tfree(param->pSecExpr);
tfree(param->pGroupColIndex);
tfree(param->pTagColumnInfo);
tfree(param->pGroupbyExpr);
tfree(param->prevResult);
}
// todo parse json to get the operator tree.
int32_t qCreateTask(void* tsdb, int32_t vgId, void* pQueryMsg, qTaskInfo_t* pTaskInfo, uint64_t taskId) {
assert(pQueryMsg != NULL && tsdb != NULL);
int32_t code = TSDB_CODE_SUCCESS;
#if 0
STaskParam param = {0};
code = convertQueryMsg(pQueryMsg, &param);
if (code != TSDB_CODE_SUCCESS) {
goto _over;
}
if (pQueryMsg->numOfTables <= 0) {
qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
code = TSDB_CODE_QRY_INVALID_MSG;
goto _over;
}
if (param.pTableIdList == NULL || taosArrayGetSize(param.pTableIdList) == 0) {
qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
code = TSDB_CODE_QRY_INVALID_MSG;
goto _over;
}
SQueriedTableInfo info = { .numOfTags = pQueryMsg->numOfTags, .numOfCols = pQueryMsg->numOfCols, .colList = pQueryMsg->tableCols};
if ((code = createQueryFunc(&info, pQueryMsg->numOfOutput, &param.pExprs, param.pExpr, param.pTagColumnInfo,
pQueryMsg->queryType, pQueryMsg, param.pUdfInfo)) != TSDB_CODE_SUCCESS) {
goto _over;
}
if (param.pSecExpr != NULL) {
if ((code = createIndirectQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, &param.pSecExprs, param.pSecExpr, param.pExprs, param.pUdfInfo)) != TSDB_CODE_SUCCESS) {
goto _over;
}
}
if (param.colCond != NULL) {
if ((code = createQueryFilter(param.colCond, pQueryMsg->colCondLen, &param.pFilters)) != TSDB_CODE_SUCCESS) {
goto _over;
}
}
param.pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, param.pGroupColIndex, &code);
if ((param.pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
goto _over;
}
bool isSTableQuery = false;
STableGroupInfo tableGroupInfo = {0};
int64_t st = taosGetTimestampUs();
if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
STableIdInfo *id = taosArrayGet(param.pTableIdList, 0);
qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
goto _over;
}
} else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
isSTableQuery = true;
// also note there's possibility that only one table in the super table
if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
STableIdInfo *id = taosArrayGet(param.pTableIdList, 0);
// group by normal column, do not pass the group by condition to tsdb to group table into different group
int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(param.pGroupColIndex->flag)) {
numOfGroupByCols = 0;
}
qDebug("qmsg:%p query stable, uid:%"PRIu64", tid:%d", pQueryMsg, id->uid, id->tid);
code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, param.tagCond, pQueryMsg->tagCondLen,
pQueryMsg->tagNameRelType, param.tbnameCond, &tableGroupInfo, param.pGroupColIndex, numOfGroupByCols);
if (code != TSDB_CODE_SUCCESS) {
qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
goto _over;
}
} else {
code = tsdbGetTableGroupFromIdList(tsdb, param.pTableIdList, &tableGroupInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _over;
}
qDebug("qmsg:%p query on %u tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
}
int64_t el = taosGetTimestampUs() - st;
qDebug("qmsg:%p tag filter completed, numOfTables:%u, elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
} else {
assert(0);
}
code = checkForQueryBuf(tableGroupInfo.numOfTables);
if (code != TSDB_CODE_SUCCESS) { // not enough query buffer, abort
goto _over;
}
assert(pQueryMsg->stableQuery == isSTableQuery);
(*pTaskInfo) = createQInfoImpl(pQueryMsg, param.pGroupbyExpr, param.pExprs, param.pSecExprs, &tableGroupInfo,
param.pTagColumnInfo, param.pFilters, vgId, param.sql, qId, param.pUdfInfo);
param.sql = NULL;
param.pExprs = NULL;
param.pSecExprs = NULL;
param.pGroupbyExpr = NULL;
param.pTagColumnInfo = NULL;
param.pFilters = NULL;
if ((*pTaskInfo) == NULL) {
code = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto _over;
}
param.pUdfInfo = NULL;
code = initQInfo(&pQueryMsg->tsBuf, tsdb, NULL, *pTaskInfo, &param, (char*)pQueryMsg, pQueryMsg->prevResultLen, NULL);
_over:
if (param.pGroupbyExpr != NULL) {
taosArrayDestroy(param.pGroupbyExpr->columnInfo);
}
tfree(param.colCond);
destroyUdfInfo(param.pUdfInfo);
taosArrayDestroy(param.pTableIdList);
param.pTableIdList = NULL;
freeParam(&param);
for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
SColumnInfo* column = pQueryMsg->tableCols + i;
freeColumnFilterInfo(column->flist.filterInfo, column->flist.numOfFilters);
}
filterFreeInfo(param.pFilters);
//pTaskInfo already freed in initQInfo, but *pTaskInfo may not pointer to null;
if (code != TSDB_CODE_SUCCESS) {
*pTaskInfo = NULL;
}
#endif
// if failed to add ref for all tables in this query, abort current query
return code;
}
#ifdef TEST_IMPL
// wait moment
int waitMoment(SQInfo* pQInfo){
if(pQInfo->sql) {
int ms = 0;
char* pcnt = strstr(pQInfo->sql, " count(*)");
if(pcnt) return 0;
char* pos = strstr(pQInfo->sql, " t_");
if(pos){
pos += 3;
ms = atoi(pos);
while(*pos >= '0' && *pos <= '9'){
pos ++;
}
char unit_char = *pos;
if(unit_char == 'h'){
ms *= 3600*1000;
} else if(unit_char == 'm'){
ms *= 60*1000;
} else if(unit_char == 's'){
ms *= 1000;
}
}
if(ms == 0) return 0;
printf("test wait sleep %dms. sql=%s ...\n", ms, pQInfo->sql);
if(ms < 1000) {
taosMsleep(ms);
} else {
int used_ms = 0;
while(used_ms < ms) {
taosMsleep(1000);
used_ms += 1000;
if(isQueryKilled(pQInfo)){
printf("test check query is canceled, sleep break.%s\n", pQInfo->sql);
break;
}
}
}
}
return 1;
}
#endif
bool qExecTask(qTaskInfo_t qinfo, uint64_t *qId) {
SQInfo *pQInfo = (SQInfo *)qinfo;
assert(pQInfo && pQInfo->signature == pQInfo);
int64_t threadId = taosGetSelfPthreadId();
int64_t curOwner = 0;
if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
qError("QInfo:0x%"PRIx64"-%p qhandle is now executed by thread:%p", pQInfo->qId, pQInfo, (void*) curOwner);
pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
return false;
}
*qId = pQInfo->qId;
if(pQInfo->startExecTs == 0)
pQInfo->startExecTs = taosGetTimestampMs();
if (isQueryKilled(pQInfo)) {
qDebug("QInfo:0x%"PRIx64" it is already killed, abort", pQInfo->qId);
return doBuildResCheck(pQInfo);
}
STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
if (pRuntimeEnv->tableqinfoGroupInfo.numOfTables == 0) {
qDebug("QInfo:0x%"PRIx64" no table exists for query, abort", pQInfo->qId);
// setTaskStatus(pRuntimeEnv, QUERY_COMPLETED);
return doBuildResCheck(pQInfo);
}
// error occurs, record the error code and return to client
int32_t ret = setjmp(pQInfo->runtimeEnv.env);
if (ret != TSDB_CODE_SUCCESS) {
publishQueryAbortEvent(pQInfo, ret);
pQInfo->code = ret;
qDebug("QInfo:0x%"PRIx64" query abort due to error/cancel occurs, code:%s", pQInfo->qId, tstrerror(pQInfo->code));
return doBuildResCheck(pQInfo);
}
qDebug("QInfo:0x%"PRIx64" query task is launched", pQInfo->qId);
bool newgroup = false;
publishOperatorProfEvent(pRuntimeEnv->proot, QUERY_PROF_BEFORE_OPERATOR_EXEC);
int64_t st = taosGetTimestampUs();
pRuntimeEnv->outputBuf = pRuntimeEnv->proot->exec(pRuntimeEnv->proot, &newgroup);
pQInfo->summary.elapsedTime += (taosGetTimestampUs() - st);
#ifdef TEST_IMPL
waitMoment(pQInfo);
#endif
publishOperatorProfEvent(pRuntimeEnv->proot, QUERY_PROF_AFTER_OPERATOR_EXEC);
pRuntimeEnv->resultInfo.total += GET_NUM_OF_RESULTS(pRuntimeEnv);
if (isQueryKilled(pQInfo)) {
qDebug("QInfo:0x%"PRIx64" query is killed", pQInfo->qId);
} else if (GET_NUM_OF_RESULTS(pRuntimeEnv) == 0) {
qDebug("QInfo:0x%"PRIx64" over, %u tables queried, total %"PRId64" rows returned", pQInfo->qId, pRuntimeEnv->tableqinfoGroupInfo.numOfTables,
pRuntimeEnv->resultInfo.total);
} else {
qDebug("QInfo:0x%"PRIx64" query paused, %d rows returned, total:%" PRId64 " rows", pQInfo->qId,
GET_NUM_OF_RESULTS(pRuntimeEnv), pRuntimeEnv->resultInfo.total);
}
return doBuildResCheck(pQInfo);
}
int32_t qRetrieveQueryResultInfo(qTaskInfo_t qinfo, bool* buildRes, void* pRspContext) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
qError("QInfo invalid qhandle");
return TSDB_CODE_QRY_INVALID_QHANDLE;
}
*buildRes = false;
if (IS_QUERY_KILLED(pQInfo)) {
qDebug("QInfo:0x%"PRIx64" query is killed, code:0x%08x", pQInfo->qId, pQInfo->code);
return pQInfo->code;
}
int32_t code = TSDB_CODE_SUCCESS;
if (tsRetrieveBlockingModel) {
pQInfo->rspContext = pRspContext;
tsem_wait(&pQInfo->ready);
*buildRes = true;
code = pQInfo->code;
} else {
STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
STaskAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr;
pthread_mutex_lock(&pQInfo->lock);
assert(pQInfo->rspContext == NULL);
if (pQInfo->dataReady == QUERY_RESULT_READY) {
*buildRes = true;
qDebug("QInfo:0x%"PRIx64" retrieve result info, rowsize:%d, rows:%d, code:%s", pQInfo->qId, pQueryAttr->resultRowSize,
GET_NUM_OF_RESULTS(pRuntimeEnv), tstrerror(pQInfo->code));
} else {
*buildRes = false;
qDebug("QInfo:0x%"PRIx64" retrieve req set query return result after paused", pQInfo->qId);
pQInfo->rspContext = pRspContext;
assert(pQInfo->rspContext != NULL);
}
code = pQInfo->code;
pthread_mutex_unlock(&pQInfo->lock);
}
return code;
}
void* qGetResultRetrieveMsg(qTaskInfo_t qinfo) {
SQInfo* pQInfo = (SQInfo*) qinfo;
assert(pQInfo != NULL);
return pQInfo->rspContext;
}
int32_t qKillTask(qTaskInfo_t qinfo) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
return TSDB_CODE_QRY_INVALID_QHANDLE;
}
qDebug("QInfo:0x%"PRIx64" query killed", pQInfo->qId);
setQueryKilled(pQInfo);
// Wait for the query executing thread being stopped/
// Once the query is stopped, the owner of qHandle will be cleared immediately.
while (pQInfo->owner != 0) {
taosMsleep(100);
}
return TSDB_CODE_SUCCESS;
}
int32_t qIsTaskCompleted(qTaskInfo_t qinfo) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
return TSDB_CODE_QRY_INVALID_QHANDLE;
}
return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQInfo->runtimeEnv.status, QUERY_OVER);
}
void qDestroyTask(qTaskInfo_t qHandle) {
SQInfo* pQInfo = (SQInfo*) qHandle;
if (!isValidQInfo(pQInfo)) {
return;
}
qDebug("QInfo:0x%"PRIx64" query completed", pQInfo->qId);
queryCostStatis(pQInfo); // print the query cost summary
doDestroyTask(pQInfo);
}
void* qOpenTaskMgmt(int32_t vgId) {
const int32_t refreshHandleInterval = 30; // every 30 seconds, refresh handle pool
char cacheName[128] = {0};
sprintf(cacheName, "qhandle_%d", vgId);
STaskMgmt* pTaskMgmt = calloc(1, sizeof(STaskMgmt));
if (pTaskMgmt == NULL) {
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
return NULL;
}
pTaskMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, refreshHandleInterval, true, freeqinfoFn, cacheName);
pTaskMgmt->closed = false;
pTaskMgmt->vgId = vgId;
pthread_mutex_init(&pTaskMgmt->lock, NULL);
qDebug("vgId:%d, open queryTaskMgmt success", vgId);
return pTaskMgmt;
}
void qTaskMgmtNotifyClosing(void* pQMgmt) {
if (pQMgmt == NULL) {
return;
}
STaskMgmt* pQueryMgmt = pQMgmt;
qInfo("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);
pthread_mutex_lock(&pQueryMgmt->lock);
pQueryMgmt->closed = true;
pthread_mutex_unlock(&pQueryMgmt->lock);
taosCacheRefresh(pQueryMgmt->qinfoPool, taskMgmtKillTaskFn, NULL);
}
void qQueryMgmtReOpen(void *pQMgmt) {
if (pQMgmt == NULL) {
return;
}
STaskMgmt *pQueryMgmt = pQMgmt;
qInfo("vgId:%d, set querymgmt reopen", pQueryMgmt->vgId);
pthread_mutex_lock(&pQueryMgmt->lock);
pQueryMgmt->closed = false;
pthread_mutex_unlock(&pQueryMgmt->lock);
}
void qCleanupTaskMgmt(void* pQMgmt) {
if (pQMgmt == NULL) {
return;
}
STaskMgmt* pQueryMgmt = pQMgmt;
int32_t vgId = pQueryMgmt->vgId;
assert(pQueryMgmt->closed);
SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
pQueryMgmt->qinfoPool = NULL;
taosCacheCleanup(pqinfoPool);
pthread_mutex_destroy(&pQueryMgmt->lock);
tfree(pQueryMgmt);
qDebug("vgId:%d, queryMgmt cleanup completed", vgId);
}
void** qRegisterTask(void* pMgmt, uint64_t qId, void *qInfo) {
if (pMgmt == NULL) {
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
return NULL;
}
STaskMgmt *pQueryMgmt = pMgmt;
if (pQueryMgmt->qinfoPool == NULL) {
qError("QInfo:0x%"PRIx64"-%p failed to add qhandle into qMgmt, since qMgmt is closed", qId, (void*)qInfo);
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
return NULL;
}
pthread_mutex_lock(&pQueryMgmt->lock);
if (pQueryMgmt->closed) {
pthread_mutex_unlock(&pQueryMgmt->lock);
qError("QInfo:0x%"PRIx64"-%p failed to add qhandle into cache, since qMgmt is colsing", qId, (void*)qInfo);
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
return NULL;
} else {
void** handle = taosCachePut(pQueryMgmt->qinfoPool, &qId, sizeof(qId), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE),
(getMaximumIdleDurationSec()*1000));
pthread_mutex_unlock(&pQueryMgmt->lock);
return handle;
}
}
void** qAcquireTask(void* pMgmt, uint64_t _key) {
STaskMgmt *pQueryMgmt = pMgmt;
if (pQueryMgmt->closed) {
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
return NULL;
}
if (pQueryMgmt->qinfoPool == NULL) {
terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
return NULL;
}
void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &_key, sizeof(_key));
if (handle == NULL || *handle == NULL) {
terrno = TSDB_CODE_QRY_INVALID_QHANDLE;
return NULL;
} else {
return handle;
}
}
void** qReleaseTask(void* pMgmt, void* pQInfo, bool freeHandle) {
STaskMgmt *pQueryMgmt = pMgmt;
if (pQueryMgmt->qinfoPool == NULL) {
return NULL;
}
taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
return 0;
}
#if 0
//kill by qid
int32_t qKillQueryByQId(void* pMgmt, int64_t qId, int32_t waitMs, int32_t waitCount) {
int32_t error = TSDB_CODE_SUCCESS;
void** handle = qAcquireTask(pMgmt, qId);
if(handle == NULL) return terrno;
SQInfo* pQInfo = (SQInfo*)(*handle);
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
return TSDB_CODE_QRY_INVALID_QHANDLE;
}
qWarn("QId:0x%"PRIx64" be killed(no memory commit).", pQInfo->qId);
setQueryKilled(pQInfo);
// wait query stop
int32_t loop = 0;
while (pQInfo->owner != 0) {
taosMsleep(waitMs);
if(loop++ > waitCount){
error = TSDB_CODE_FAILED;
break;
}
}
qReleaseTask(pMgmt, (void **)&handle, true);
return error;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@
#include "index.h"
#include "index_fst.h"
#include "taos.h"
#include "tchecksum.h"
#include "thash.h"
#include "tlog.h"

View File

@ -34,6 +34,7 @@ typedef struct WriterCtx {
int (*read)(struct WriterCtx* ctx, uint8_t* buf, int len);
int (*flush)(struct WriterCtx* ctx);
int (*readFrom)(struct WriterCtx* ctx, uint8_t* buf, int len, int32_t offset);
int (*size)(struct WriterCtx* ctx);
WriterType type;
union {
struct {

View File

@ -34,7 +34,10 @@ void indexInit() {
// refactor later
indexQhandle = taosInitScheduler(INDEX_QUEUE_SIZE, INDEX_NUM_OF_THREADS, "index");
}
void indexCleanUp() { taosCleanUpScheduler(indexQhandle); }
void indexCleanUp() {
// refacto later
taosCleanUpScheduler(indexQhandle);
}
static int uidCompare(const void* a, const void* b) {
// add more version compare

View File

@ -21,7 +21,7 @@
#define MAX_INDEX_KEY_LEN 256 // test only, change later
#define MEM_TERM_LIMIT 10 * 10000
#define MEM_THRESHOLD 1024 * 1024 * 2
#define MEM_THRESHOLD 1024 * 1024
#define MEM_ESTIMATE_RADIO 1.5
static void indexMemRef(MemTable* tbl);

View File

@ -935,7 +935,10 @@ Fst* fstCreate(FstSlice* slice) {
uint32_t checkSum = 0;
len -= sizeof(checkSum);
taosDecodeFixedU32(buf + len, &checkSum);
if (taosCheckChecksum(buf, len, checkSum)) {
// verify fst
return NULL;
}
CompiledAddr rootAddr;
len -= sizeof(rootAddr);
taosDecodeFixedU64(buf + len, &rootAddr);

View File

@ -59,6 +59,13 @@ static int writeCtxDoReadFrom(WriterCtx* ctx, uint8_t* buf, int len, int32_t off
}
return nRead;
}
static int writeCtxGetSize(WriterCtx* ctx) {
if (ctx->type == TFile && ctx->file.readOnly) {
// refactor later
return ctx->file.size;
}
return 0;
}
static int writeCtxDoFlush(WriterCtx* ctx) {
if (ctx->type == TFile) {
// taosFsyncFile(ctx->file.fd);
@ -109,6 +116,7 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int
ctx->read = writeCtxDoRead;
ctx->flush = writeCtxDoFlush;
ctx->readFrom = writeCtxDoReadFrom;
ctx->size = writeCtxGetSize;
ctx->offset = 0;
ctx->limit = capacity;
@ -159,6 +167,8 @@ int fstCountingWriterWrite(FstCountingWriter* write, uint8_t* buf, uint32_t len)
int nWrite = ctx->write(ctx, buf, len);
assert(nWrite == len);
write->count += len;
write->summer = taosCalcChecksum(write->summer, buf, len);
return len;
}
int fstCountingWriterRead(FstCountingWriter* write, uint8_t* buf, uint32_t len) {
@ -169,7 +179,10 @@ int fstCountingWriterRead(FstCountingWriter* write, uint8_t* buf, uint32_t len)
return nRead;
}
uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter* write) { return 0; }
uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter* write) {
// opt
return write->summer;
}
int fstCountingWriterFlush(FstCountingWriter* write) {
WriterCtx* ctx = write->wrt;

View File

@ -21,8 +21,11 @@ p *
#include "index_fst_counting_writer.h"
#include "index_util.h"
#include "taosdef.h"
#include "tcoding.h"
#include "tcompare.h"
const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull;
typedef struct TFileFstIter {
FstStreamBuilder* fb;
StreamWithState* st;
@ -40,9 +43,12 @@ static void tfileSerialTableIdsToBuf(char* buf, SArray* tableIds);
static int tfileWriteHeader(TFileWriter* writer);
static int tfileWriteFstOffset(TFileWriter* tw, int32_t offset);
static int tfileWriteData(TFileWriter* write, TFileValue* tval);
static int tfileWriteFooter(TFileWriter* write);
// handle file corrupt later
static int tfileReaderLoadHeader(TFileReader* reader);
static int tfileReaderLoadFst(TFileReader* reader);
static int tfileReaderVerify(TFileReader* reader);
static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* result);
static SArray* tfileGetFileList(const char* path);
@ -138,8 +144,14 @@ TFileReader* tfileReaderCreate(WriterCtx* ctx) {
TFileReader* reader = calloc(1, sizeof(TFileReader));
if (reader == NULL) { return NULL; }
// T_REF_INC(reader);
reader->ctx = ctx;
if (0 != tfileReaderVerify(reader)) {
tfileReaderDestroy(reader);
indexError("invalid tfile, suid: %" PRIu64 ", colName: %s", reader->header.suid, reader->header.colName);
return NULL;
}
// T_REF_INC(reader);
if (0 != tfileReaderLoadHeader(reader)) {
tfileReaderDestroy(reader);
indexError("failed to load index header, suid: %" PRIu64 ", colName: %s", reader->header.suid,
@ -296,6 +308,8 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
fstBuilderFinish(tw->fb);
fstBuilderDestroy(tw->fb);
tw->fb = NULL;
tfileWriteFooter(tw);
return 0;
}
void tfileWriterClose(TFileWriter* tw) {
@ -502,6 +516,14 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) {
}
return 0;
}
static int tfileWriteFooter(TFileWriter* write) {
char buf[sizeof(tfileMagicNumber) + 1] = {0};
void* pBuf = (void*)buf;
taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber);
int nwrite = write->ctx->write(write->ctx, buf, strlen(buf));
assert(nwrite == sizeof(tfileMagicNumber));
return nwrite;
}
static int tfileReaderLoadHeader(TFileReader* reader) {
// TODO simple tfile header later
char buf[TFILE_HEADER_SIZE] = {0};
@ -527,9 +549,14 @@ static int tfileReaderLoadFst(TFileReader* reader) {
if (buf == NULL) { return -1; }
WriterCtx* ctx = reader->ctx;
int32_t nread = ctx->readFrom(ctx, buf, FST_MAX_SIZE, reader->header.fstOffset);
indexInfo("nread = %d, and fst offset=%d, filename: %s, size: %d ", nread, reader->header.fstOffset, ctx->file.buf,
ctx->file.size);
int size = ctx->size(ctx);
int64_t ts = taosGetTimestampUs();
int32_t nread =
ctx->readFrom(ctx, buf, size - reader->header.fstOffset - sizeof(tfileMagicNumber), reader->header.fstOffset);
int64_t cost = taosGetTimestampUs() - ts;
indexInfo("nread = %d, and fst offset=%d, filename: %s, size: %d, time cost: %" PRId64 "us", nread,
reader->header.fstOffset, ctx->file.buf, ctx->file.size, cost);
// we assuse fst size less than FST_MAX_SIZE
assert(nread > 0 && nread < FST_MAX_SIZE);
@ -558,6 +585,25 @@ static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray*
free(buf);
return 0;
}
static int tfileReaderVerify(TFileReader* reader) {
// just validate header and Footer, file corrupted also shuild be verified later
WriterCtx* ctx = reader->ctx;
uint64_t tMagicNumber = 0;
char buf[sizeof(tMagicNumber) + 1] = {0};
int size = ctx->size(ctx);
if (size < sizeof(tMagicNumber) || size <= sizeof(reader->header)) {
return -1;
} else if (ctx->readFrom(ctx, buf, sizeof(tMagicNumber), size - sizeof(tMagicNumber)) != sizeof(tMagicNumber)) {
return -1;
}
taosDecodeFixedU64(buf, &tMagicNumber);
return tMagicNumber == tfileMagicNumber ? 0 : -1;
}
void tfileReaderRef(TFileReader* reader) {
if (reader == NULL) { return; }
int ref = T_REF_INC(reader);

View File

@ -1,4 +1,5 @@
#include <algorithm>
#include <iostream>
#include <string>
#include <thread>
@ -12,7 +13,6 @@
#include "index_tfile.h"
#include "tskiplist.h"
#include "tutil.h"
void* callback(void* s) { return s; }
static std::string fileName = "/tmp/tindex.tindex";
@ -293,7 +293,7 @@ void validateTFile(char* arg) {
std::thread threads[NUM_OF_THREAD];
// std::vector<std::thread> threads;
TFileReader* reader = tfileReaderOpen(arg, 0, 999992, "tag1");
TFileReader* reader = tfileReaderOpen(arg, 0, 20000000, "tag1");
for (int i = 0; i < NUM_OF_THREAD; i++) {
threads[i] = std::thread(fst_get, reader->fst);
@ -306,13 +306,41 @@ void validateTFile(char* arg) {
}
tfCleanup();
}
void iterTFileReader(char* path, char* ver) {
tfInit();
int version = atoi(ver);
TFileReader* reader = tfileReaderOpen(path, 0, version, "tag1");
Iterate* iter = tfileIteratorCreate(reader);
bool tn = iter ? iter->next(iter) : false;
int count = 0;
int termCount = 0;
while (tn == true) {
count++;
IterateValue* cv = iter->getValue(iter);
termCount += (int)taosArrayGetSize(cv->val);
printf("col val: %s, size: %d\n", cv->colVal, (int)taosArrayGetSize(cv->val));
tn = iter->next(iter);
}
printf("total size: %d\n term count: %d\n", count, termCount);
tfileIteratorDestroy(iter);
tfCleanup();
}
int main(int argc, char* argv[]) {
// tool to check all kind of fst test
// if (argc > 1) { validateTFile(argv[1]); }
if (argc > 2) {
// opt
iterTFileReader(argv[1], argv[2]);
}
// checkFstCheckIterator();
// checkFstLongTerm();
// checkFstPrefixSearch();
checkMillonWriteAndReadOfFst();
// checkMillonWriteAndReadOfFst();
return 1;
}

View File

@ -665,14 +665,19 @@ class IndexObj {
size_t numOfTable = 100 * 10000) {
std::string tColVal = colVal;
size_t colValSize = tColVal.size();
int skip = 100;
numOfTable /= skip;
for (int i = 0; i < numOfTable; i++) {
tColVal[i % colValSize] = 'a' + i % 26;
for (int k = 0; k < 10 && k < colVal.size(); k++) {
// opt
tColVal[rand() % colValSize] = 'a' + k % 26;
}
SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(),
tColVal.c_str(), tColVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 10; i++) {
int ret = Put(terms, i);
for (size_t j = 0; j < skip; j++) {
int ret = Put(terms, j);
assert(ret == 0);
}
indexMultiTermDestroy(terms);
@ -939,10 +944,11 @@ TEST_F(IndexEnv2, testIndex_read_performance) {
TEST_F(IndexEnv2, testIndexMultiTag) {
std::string path = "/tmp/multi_tag";
if (index->Init(path) != 0) {}
index->WriteMultiMillonData("tag1", "Hello", 100 * 10000);
index->WriteMultiMillonData("tag2", "Test", 100 * 10000);
index->WriteMultiMillonData("tag3", "Test", 100 * 10000);
index->WriteMultiMillonData("tag4", "Test", 100 * 10000);
int64_t st = taosGetTimestampUs();
int32_t num = 1000 * 10000;
index->WriteMultiMillonData("tag1", "xxxxxxxxxxxxxxx", num);
std::cout << "numOfRow: " << num << "\ttime cost:" << taosGetTimestampUs() - st << std::endl;
// index->WriteMultiMillonData("tag2", "xxxxxxxxxxxxxxxxxxxxxxxxx", 100 * 10000);
}
TEST_F(IndexEnv2, testLongComVal) {
std::string path = "/tmp/long_colVal";

View File

@ -3952,7 +3952,8 @@ int32_t qParserValidateSqlNode(SParseBasicCtx *pCtx, SSqlInfo* pInfo, SQueryStmt
pQueryInfo->pTableMetaInfo[0]->pTableMeta = pmt;
pQueryInfo->pTableMetaInfo[0]->name = *name;
pQueryInfo->numOfTables = 1;
pQueryInfo->pTableMetaInfo[0]->tagColList = taosArrayInit(4, POINTER_BYTES);
code = setTableVgroupList(pCtx, name, &pQueryInfo->pTableMetaInfo[0]->vgroupList);
if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroy(data.pTableMeta);

View File

@ -106,7 +106,7 @@ int32_t queryPlanToString(struct SQueryPlanNode* pQueryNode, char** str);
int32_t queryPlanToSql(struct SQueryPlanNode* pQueryNode, char** sql);
int32_t createDag(SQueryPlanNode* pQueryNode, struct SCatalog* pCatalog, SQueryDag** pDag, uint64_t requestId);
int32_t setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep);
void setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep);
int32_t subPlanToString(const SSubplan *pPhyNode, char** str, int32_t* len);
int32_t stringToSubplan(const char* str, SSubplan** subplan);

View File

@ -75,24 +75,16 @@ int32_t dsinkNameToDsinkType(const char* name) {
return DSINK_Unknown;
}
static SDataSink* initDataSink(int32_t type, int32_t size) {
SDataSink* sink = (SDataSink*)validPointer(calloc(1, size));
sink->info.type = type;
sink->info.name = dsinkTypeToDsinkName(type);
return sink;
}
static SDataSink* createDataDispatcher(SPlanContext* pCxt, SQueryPlanNode* pPlanNode) {
SDataDispatcher* dispatcher = (SDataDispatcher*)initDataSink(DSINK_Dispatch, sizeof(SDataDispatcher));
return (SDataSink*)dispatcher;
}
static SDataSink* createDataInserter(SPlanContext* pCxt, SVgDataBlocks* pBlocks) {
SDataInserter* inserter = (SDataInserter*)initDataSink(DSINK_Insert, sizeof(SDataInserter));
inserter->numOfTables = pBlocks->numOfTables;
inserter->size = pBlocks->size;
SWAP(inserter->pData, pBlocks->pData, char*);
return (SDataSink*)inserter;
static bool copySchema(SDataBlockSchema* dst, const SDataBlockSchema* src) {
dst->pSchema = malloc(sizeof(SSlotSchema) * src->numOfCols);
if (NULL == dst->pSchema) {
return false;
}
memcpy(dst->pSchema, src->pSchema, sizeof(SSlotSchema) * src->numOfCols);
dst->numOfCols = src->numOfCols;
dst->resultRowSize = src->resultRowSize;
dst->precision = src->precision;
return true;
}
static bool toDataBlockSchema(SQueryPlanNode* pPlanNode, SDataBlockSchema* dataBlockSchema) {
@ -102,6 +94,10 @@ static bool toDataBlockSchema(SQueryPlanNode* pPlanNode, SDataBlockSchema* dataB
return false;
}
memcpy(dataBlockSchema->pSchema, pPlanNode->pSchema, sizeof(SSlotSchema) * pPlanNode->numOfCols);
dataBlockSchema->resultRowSize = 0;
for (int32_t i = 0; i < dataBlockSchema->numOfCols; ++i) {
dataBlockSchema->resultRowSize += dataBlockSchema->pSchema[i].bytes;
}
return true;
}
@ -120,13 +116,37 @@ static bool cloneExprArray(SArray** dst, SArray* src) {
return (TSDB_CODE_SUCCESS == copyAllExprInfo(*dst, src, true) ? true : false);
}
static SDataSink* initDataSink(int32_t type, int32_t size, const SPhyNode* pRoot) {
SDataSink* sink = (SDataSink*)validPointer(calloc(1, size));
sink->info.type = type;
sink->info.name = dsinkTypeToDsinkName(type);
if (NULL !=pRoot && !copySchema(&sink->schema, &pRoot->targetSchema)) {
tfree(sink);
THROW(TSDB_CODE_TSC_OUT_OF_MEMORY);
}
return sink;
}
static SDataSink* createDataInserter(SPlanContext* pCxt, SVgDataBlocks* pBlocks, const SPhyNode* pRoot) {
SDataInserter* inserter = (SDataInserter*)initDataSink(DSINK_Insert, sizeof(SDataInserter), pRoot);
inserter->numOfTables = pBlocks->numOfTables;
inserter->size = pBlocks->size;
SWAP(inserter->pData, pBlocks->pData, char*);
return (SDataSink*)inserter;
}
static SDataSink* createDataDispatcher(SPlanContext* pCxt, SQueryPlanNode* pPlanNode, const SPhyNode* pRoot) {
SDataDispatcher* dispatcher = (SDataDispatcher*)initDataSink(DSINK_Dispatch, sizeof(SDataDispatcher), pRoot);
return (SDataSink*)dispatcher;
}
static SPhyNode* initPhyNode(SQueryPlanNode* pPlanNode, int32_t type, int32_t size) {
SPhyNode* node = (SPhyNode*)validPointer(calloc(1, size));
node->info.type = type;
node->info.name = opTypeToOpName(type);
if (!cloneExprArray(&node->pTargets, pPlanNode->pExpr) || !toDataBlockSchema(pPlanNode, &(node->targetSchema))) {
free(node);
return NULL;
THROW(TSDB_CODE_TSC_OUT_OF_MEMORY);
}
return node;
}
@ -149,7 +169,7 @@ static SPhyNode* createTagScanNode(SQueryPlanNode* pPlanNode) {
static uint8_t getScanFlag(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable) {
// todo
return MASTER_SCAN;
return MAIN_SCAN;
}
static SPhyNode* createUserTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable, int32_t op) {
@ -237,9 +257,10 @@ static uint64_t splitSubplanByTable(SPlanContext* pCxt, SQueryPlanNode* pPlanNod
for (int32_t i = 0; i < pTable->pMeta->vgroupList->numOfVgroups; ++i) {
STORE_CURRENT_SUBPLAN(pCxt);
SSubplan* subplan = initSubplan(pCxt, QUERY_TYPE_SCAN);
subplan->msgType = TDMT_VND_QUERY;
vgroupMsgToEpSet(&(pTable->pMeta->vgroupList->vgroups[i]), &subplan->execNode);
subplan->pNode = createMultiTableScanNode(pPlanNode, pTable);
subplan->pDataSink = createDataDispatcher(pCxt, pPlanNode);
subplan->pDataSink = createDataDispatcher(pCxt, pPlanNode, subplan->pNode);
RECOVERY_CURRENT_SUBPLAN(pCxt);
}
return pCxt->nextId.templateId++;
@ -248,6 +269,7 @@ static uint64_t splitSubplanByTable(SPlanContext* pCxt, SQueryPlanNode* pPlanNod
static SPhyNode* createExchangeNode(SPlanContext* pCxt, SQueryPlanNode* pPlanNode, uint64_t srcTemplateId) {
SExchangePhyNode* node = (SExchangePhyNode*)initPhyNode(pPlanNode, OP_Exchange, sizeof(SExchangePhyNode));
node->srcTemplateId = srcTemplateId;
node->pSrcEndPoints = validPointer(taosArrayInit(TARRAY_MIN_SIZE, sizeof(SQueryNodeAddr)));
return (SPhyNode*)node;
}
@ -313,7 +335,7 @@ static void splitModificationOpSubPlan(SPlanContext* pCxt, SQueryPlanNode* pPlan
SVgDataBlocks* blocks = (SVgDataBlocks*)taosArrayGetP(pPayload->payload, i);
vgroupInfoToEpSet(&blocks->vg, &subplan->execNode);
subplan->pDataSink = createDataInserter(pCxt, blocks);
subplan->pDataSink = createDataInserter(pCxt, blocks, NULL);
subplan->pNode = NULL;
subplan->type = QUERY_TYPE_MODIFY;
subplan->msgType = pPayload->msgType;
@ -332,7 +354,7 @@ static void createSubplanByLevel(SPlanContext* pCxt, SQueryPlanNode* pRoot) {
subplan->msgType = TDMT_VND_QUERY;
subplan->pNode = createPhyNode(pCxt, pRoot);
subplan->pDataSink = createDataDispatcher(pCxt, pRoot);
subplan->pDataSink = createDataDispatcher(pCxt, pRoot, subplan->pNode);
}
// todo deal subquery
}
@ -359,6 +381,24 @@ int32_t createDag(SQueryPlanNode* pQueryNode, struct SCatalog* pCatalog, SQueryD
return TSDB_CODE_SUCCESS;
}
int32_t setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) {
//todo
void setExchangSourceNode(uint64_t templateId, SQueryNodeAddr* pEp, SPhyNode* pNode) {
if (NULL == pNode) {
return;
}
if (OP_Exchange == pNode->info.type) {
SExchangePhyNode* pExchange = (SExchangePhyNode*)pNode;
if (templateId == pExchange->srcTemplateId) {
taosArrayPush(pExchange->pSrcEndPoints, pEp);
}
}
if (pNode->pChildren != NULL) {
size_t size = taosArrayGetSize(pNode->pChildren);
for(int32_t i = 0; i < size; ++i) {
setExchangSourceNode(templateId, pEp, taosArrayGetP(pNode->pChildren, i));
}
}
}
void setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* pEp) {
setExchangSourceNode(templateId, pEp, subplan->pNode);
}

View File

@ -88,8 +88,8 @@ int32_t qCreateQueryDag(const struct SQueryNode* pNode, struct SQueryDag** pDag,
return TSDB_CODE_SUCCESS;
}
int32_t qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) {
return setSubplanExecutionNode(subplan, templateId, ep);
void qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) {
setSubplanExecutionNode(subplan, templateId, ep);
}
int32_t qSubPlanToString(const SSubplan *subplan, char** str, int32_t* len) {

View File

@ -96,7 +96,7 @@ typedef struct SSchJob {
SHashObj *succTasks; // succeed tasks, key:taskid, value:SQueryTask*
SHashObj *failTasks; // failed tasks, key:taskid, value:SQueryTask*
int8_t status;
int8_t status;
SQueryNodeAddr resNode;
tsem_t rspSem;
int32_t userFetch;
@ -113,10 +113,10 @@ typedef struct SSchJob {
#define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->type == QUERY_TYPE_SCAN)
#define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->type == QUERY_TYPE_MODIFY)
#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st)
#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st)
#define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status)
#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st)
#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st)
#define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status)
#define SCH_SET_JOB_TYPE(pAttr, type) (pAttr)->queryJob = ((type) != QUERY_TYPE_MODIFY)

View File

@ -20,6 +20,75 @@
static SSchedulerMgmt schMgmt = {0};
int32_t schValidateStatus(SSchJob *pJob, int8_t oriStatus, int8_t newStatus) {
int32_t code = 0;
/*
if (oriStatus == newStatus) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
switch (oriStatus) {
case JOB_TASK_STATUS_NULL:
if (newStatus != JOB_TASK_STATUS_EXECUTING
&& newStatus != JOB_TASK_STATUS_FAILED
&& newStatus != JOB_TASK_STATUS_NOT_START) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
break;
case JOB_TASK_STATUS_NOT_START:
if (newStatus != JOB_TASK_STATUS_CANCELLED) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
break;
case JOB_TASK_STATUS_EXECUTING:
if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED
&& newStatus != JOB_TASK_STATUS_FAILED
&& newStatus != JOB_TASK_STATUS_CANCELLING
&& newStatus != JOB_TASK_STATUS_CANCELLED
&& newStatus != JOB_TASK_STATUS_DROPPING) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
break;
case JOB_TASK_STATUS_PARTIAL_SUCCEED:
if (newStatus != JOB_TASK_STATUS_EXECUTING
&& newStatus != JOB_TASK_STATUS_SUCCEED
&& newStatus != JOB_TASK_STATUS_CANCELLED) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
break;
case JOB_TASK_STATUS_SUCCEED:
case JOB_TASK_STATUS_FAILED:
case JOB_TASK_STATUS_CANCELLING:
if (newStatus != JOB_TASK_STATUS_CANCELLED) {
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
}
break;
case JOB_TASK_STATUS_CANCELLED:
case JOB_TASK_STATUS_DROPPING:
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
break;
default:
qError("invalid task status:%d", oriStatus);
return TSDB_CODE_QRY_APP_ERROR;
}
*/
return TSDB_CODE_SUCCESS;
_return:
SCH_JOB_ELOG("invalid job status update, from %d to %d", oriStatus, newStatus);
SCH_ERR_RET(code);
}
int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) {
for (int32_t i = 0; i < pJob->levelNum; ++i) {
SSchLevel *pLevel = taosArrayGet(pJob->levels, i);
@ -365,14 +434,21 @@ int32_t schProcessOnJobPartialSuccess(SSchJob *job) {
return TSDB_CODE_SUCCESS;
}
int32_t schProcessOnJobFailure(SSchJob *job, int32_t errCode) {
job->status = JOB_TASK_STATUS_FAILED;
job->errCode = errCode;
int32_t schProcessOnJobFailure(SSchJob *pJob, int32_t errCode) {
int8_t status = SCH_GET_JOB_STATUS(pJob);
atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0);
if (schValidateStatus(pJob, status, JOB_TASK_STATUS_FAILED)) {
SCH_ERR_RET(atomic_load_32(&pJob->errCode));
}
SCH_SET_JOB_STATUS(pJob, JOB_TASK_STATUS_FAILED);
atomic_store_32(&pJob->errCode, errCode);
if (job->userFetch || ((!SCH_JOB_NEED_FETCH(&job->attr)) && job->attr.syncSchedule)) {
tsem_post(&job->rspSem);
atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0);
if (pJob->userFetch || ((!SCH_JOB_NEED_FETCH(&pJob->attr)) && pJob->attr.syncSchedule)) {
tsem_post(&pJob->rspSem);
}
return TSDB_CODE_SUCCESS;
@ -387,6 +463,7 @@ int32_t schProcessOnDataFetched(SSchJob *job) {
int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
bool moved = false;
int32_t code = 0;
SCH_ERR_RET(schMoveTaskToSuccList(pJob, pTask, &moved));
if (!moved) {
@ -448,7 +525,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
atomic_add_fetch_32(&par->childReady, 1);
SCH_ERR_RET(qSetSubplanExecutionNode(par->plan, pTask->plan->id.templateId, &pTask->execAddr));
qSetSubplanExecutionNode(par->plan, pTask->plan->id.templateId, &pTask->execAddr);
if (SCH_TASK_READY_TO_LUNCH(par)) {
SCH_ERR_RET(schLaunchTask(pJob, par));
@ -465,7 +542,7 @@ int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode)
SCH_ERR_RET(schTaskCheckAndSetRetry(pJob, pTask, errCode, &needRetry));
if (!needRetry) {
SCH_TASK_ELOG("task failed[%x], no more retry", errCode);
SCH_TASK_ELOG("task failed and no more retry, code:%x", errCode);
if (SCH_GET_TASK_STATUS(pTask) == JOB_TASK_STATUS_EXECUTING) {
SCH_ERR_RET(schMoveTaskToFailList(pJob, pTask, &moved));
@ -670,13 +747,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t
int32_t code = 0;
SMsgSendInfo* pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo));
if (NULL == pMsgSendInfo) {
qError("calloc %d failed", (int32_t)sizeof(SMsgSendInfo));
qError("QID:%"PRIx64 ",TID:%"PRIx64 " calloc %d failed", qId, tId, (int32_t)sizeof(SMsgSendInfo));
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SSchCallbackParam *param = calloc(1, sizeof(SSchCallbackParam));
if (NULL == param) {
qError("calloc %d failed", (int32_t)sizeof(SSchCallbackParam));
qError("QID:%"PRIx64 ",TID:%"PRIx64 " calloc %d failed", qId, tId, (int32_t)sizeof(SSchCallbackParam));
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
@ -694,11 +771,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t
pMsgSendInfo->fp = fp;
int64_t transporterId = 0;
SCH_ERR_JRET(asyncSendMsgToServer(transport, epSet, &transporterId, pMsgSendInfo));
return TSDB_CODE_SUCCESS;
_return:
tfree(param);
tfree(pMsgSendInfo);
@ -720,35 +799,31 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
uint32_t msgSize = 0;
void *msg = NULL;
int32_t code = 0;
SEpSet epSet;
SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx);
schConvertAddrToEpSet(addr, &epSet);
switch (msgType) {
case TDMT_VND_CREATE_TABLE:
case TDMT_VND_SUBMIT: {
if (NULL == pTask->msg || pTask->msgLen <= 0) {
qError("submit msg is NULL");
SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
msgSize = pTask->msgLen;
msg = pTask->msg;
break;
}
case TDMT_VND_QUERY: {
if (NULL == pTask->msg) {
qError("query msg is NULL");
SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
msgSize = sizeof(SSubQueryMsg) + pTask->msgLen;
msg = calloc(1, msgSize);
if (NULL == msg) {
qError("calloc %d failed", msgSize);
SCH_TASK_ELOG("calloc %d failed", msgSize);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SSubQueryMsg *pMsg = msg;
pMsg->header.vgId = htonl(pTask->plan->execNode.nodeId);
pMsg->header.vgId = htonl(addr->nodeId);
pMsg->sId = htobe64(schMgmt.sId);
pMsg->queryId = htobe64(pJob->queryId);
pMsg->taskId = htobe64(pTask->taskId);
@ -760,32 +835,31 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
msgSize = sizeof(SResReadyMsg);
msg = calloc(1, msgSize);
if (NULL == msg) {
qError("calloc %d failed", msgSize);
SCH_TASK_ELOG("calloc %d failed", msgSize);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SResReadyMsg *pMsg = msg;
pMsg->header.vgId = htonl(pTask->plan->execNode.nodeId);
pMsg->header.vgId = htonl(addr->nodeId);
pMsg->sId = htobe64(schMgmt.sId);
pMsg->queryId = htobe64(pJob->queryId);
pMsg->taskId = htobe64(pTask->taskId);
break;
}
case TDMT_VND_FETCH: {
if (NULL == pTask) {
SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR);
}
msgSize = sizeof(SResFetchMsg);
msg = calloc(1, msgSize);
if (NULL == msg) {
qError("calloc %d failed", msgSize);
SCH_TASK_ELOG("calloc %d failed", msgSize);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SResFetchMsg *pMsg = msg;
pMsg->header.vgId = htonl(pTask->plan->execNode.nodeId);
pMsg->header.vgId = htonl(addr->nodeId);
pMsg->sId = htobe64(schMgmt.sId);
pMsg->queryId = htobe64(pJob->queryId);
pMsg->taskId = htobe64(pTask->taskId);
@ -795,28 +869,25 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
msgSize = sizeof(STaskDropMsg);
msg = calloc(1, msgSize);
if (NULL == msg) {
qError("calloc %d failed", msgSize);
SCH_TASK_ELOG("calloc %d failed", msgSize);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
STaskDropMsg *pMsg = msg;
pMsg->header.vgId = htonl(pTask->plan->execNode.nodeId);
pMsg->header.vgId = htonl(addr->nodeId);
pMsg->sId = htobe64(schMgmt.sId);
pMsg->queryId = htobe64(pJob->queryId);
pMsg->taskId = htobe64(pTask->taskId);
break;
}
default:
qError("unknown msg type:%d", msgType);
SCH_TASK_ELOG("unknown msg type:%d", msgType);
SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
break;
}
SEpSet epSet;
SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx);
schConvertAddrToEpSet(addr, &epSet);
SCH_ERR_JRET(schAsyncSendMsg(pJob->transport, &epSet, pJob->queryId, pTask->taskId, msgType, msg, msgSize));
@ -844,7 +915,7 @@ int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) {
if (schJobNeedToStop(pJob, &status)) {
SCH_TASK_ELOG("no need to launch task cause of job status, job status:%d", status);
SCH_ERR_RET(pJob->errCode);
SCH_ERR_RET(atomic_load_32(&pJob->errCode));
}
SSubplan *plan = pTask->plan;
@ -860,9 +931,11 @@ int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) {
SCH_ERR_JRET(schSetTaskCandidateAddrs(pJob, pTask));
// NOTE: race condition: the task should be put into the hash table before send msg to server
SCH_ERR_JRET(schPushTaskToExecList(pJob, pTask));
if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING) {
SCH_ERR_JRET(schPushTaskToExecList(pJob, pTask));
SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_EXECUTING);
SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_EXECUTING);
}
SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, plan->msgType));
@ -1031,7 +1104,7 @@ int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void
SSchJob *job = *(SSchJob **)pJob;
pRes->code = job->errCode;
pRes->code = atomic_load_32(&job->errCode);
pRes->numOfRows = job->resNumOfRows;
return TSDB_CODE_SUCCESS;
@ -1061,7 +1134,7 @@ int32_t scheduleFetchRows(void *pJob, void **data) {
if (job->status == JOB_TASK_STATUS_FAILED) {
job->res = NULL;
SCH_RET(job->errCode);
SCH_RET(atomic_load_32(&job->errCode));
}
if (job->status == JOB_TASK_STATUS_SUCCEED) {
@ -1081,7 +1154,7 @@ int32_t scheduleFetchRows(void *pJob, void **data) {
tsem_wait(&job->rspSem);
if (job->status == JOB_TASK_STATUS_FAILED) {
code = job->errCode;
code = atomic_load_32(&job->errCode);
}
if (job->res && ((SRetrieveTableRsp *)job->res)->completed) {