6258 lines
209 KiB
C
6258 lines
209 KiB
C
/*
|
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
|
*
|
|
* This program is free software: you can use, redistribute, and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3
|
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include "os.h"
|
|
|
|
#include "hash.h"
|
|
#include "hashfunc.h"
|
|
#include "qast.h"
|
|
#include "qresultBuf.h"
|
|
#include "query.h"
|
|
#include "queryExecutor.h"
|
|
#include "queryLog.h"
|
|
#include "queryUtil.h"
|
|
#include "taosmsg.h"
|
|
#include "tlosertree.h"
|
|
#include "tscompression.h"
|
|
#include "tsdbMain.h" //todo use TableId instead of STable object
|
|
#include "ttime.h"
|
|
#include "tscUtil.h" // todo move the function to common module
|
|
|
|
#define DEFAULT_INTERN_BUF_SIZE 16384L
|
|
|
|
/**
|
|
* check if the primary column is load by default, otherwise, the program will
|
|
* forced to load primary column explicitly.
|
|
*/
|
|
#define PRIMARY_TSCOL_LOADED(query) ((query)->colList[0].data.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX)
|
|
|
|
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
|
|
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
|
|
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)
|
|
|
|
#define IS_MASTER_SCAN(runtime) (((runtime)->scanFlag & 1u) == MASTER_SCAN)
|
|
#define IS_SUPPLEMENT_SCAN(runtime) ((runtime)->scanFlag == SUPPLEMENTARY_SCAN)
|
|
#define SET_SUPPLEMENT_SCAN_FLAG(runtime) ((runtime)->scanFlag = SUPPLEMENTARY_SCAN)
|
|
#define SET_MASTER_SCAN_FLAG(runtime) ((runtime)->scanFlag = MASTER_SCAN)
|
|
|
|
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
|
|
|
|
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
|
|
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
|
|
|
|
/* get the qinfo struct address from the query struct address */
|
|
#define GET_COLUMN_BYTES(query, colidx) \
|
|
((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIndex].bytes)
|
|
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIndex].type)
|
|
|
|
typedef struct SPointInterpoSupporter {
|
|
int32_t numOfCols;
|
|
char ** pPrevPoint;
|
|
char ** pNextPoint;
|
|
} SPointInterpoSupporter;
|
|
|
|
typedef enum {
|
|
// when query starts to execute, this status will set
|
|
QUERY_NOT_COMPLETED = 0x1u,
|
|
|
|
/* result output buffer is full, current query is paused.
|
|
* this status is only exist in group-by clause and diff/add/division/multiply/ query.
|
|
*/
|
|
QUERY_RESBUF_FULL = 0x2u,
|
|
|
|
/* query is over
|
|
* 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
|
|
* 2. when all data within queried time window, it is also denoted as query_completed
|
|
*/
|
|
QUERY_COMPLETED = 0x4u,
|
|
|
|
/* when the result is not completed return to client, this status will be
|
|
* usually used in case of interval query with interpolation option
|
|
*/
|
|
QUERY_OVER = 0x8u,
|
|
} vnodeQueryStatus;
|
|
|
|
enum {
|
|
TS_JOIN_TS_EQUAL = 0,
|
|
TS_JOIN_TS_NOT_EQUALS = 1,
|
|
TS_JOIN_TAG_NOT_EQUALS = 2,
|
|
};
|
|
|
|
typedef struct {
|
|
int32_t status; // query status
|
|
TSKEY lastKey; // the lastKey value before query executed
|
|
STimeWindow w; // whole query time window
|
|
STimeWindow curWindow; // current query window
|
|
int32_t windowIndex; // index of active time window result for interval query
|
|
STSCursor cur;
|
|
} SQueryStatusInfo;
|
|
|
|
static void setQueryStatus(SQuery *pQuery, int8_t status);
|
|
bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
|
|
|
|
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
|
|
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
|
|
|
|
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
|
|
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
|
|
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
|
|
|
|
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void *inputData, TSKEY *tsCol, int32_t size,
|
|
int32_t functionId, SDataStatis *pStatis, bool hasNull, void *param, int32_t scanFlag);
|
|
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
|
|
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
|
|
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
|
|
static bool hasMainOutput(SQuery *pQuery);
|
|
static void createTableDataInfo(SQInfo *pQInfo);
|
|
|
|
static int32_t setAdditionalInfo(SQInfo *pQInfo, STable *pTable, STableQueryInfo *pTableQueryInfo);
|
|
static int32_t flushFromResultBuf(SQInfo *pQInfo);
|
|
|
|
bool getNeighborPoints(SQInfo *pQInfo, void *pMeterObj, SPointInterpoSupporter *pPointInterpSupporter) {
|
|
#if 0
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (!isPointInterpoQuery(pQuery)) {
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* for interpolate point query, points that are directly before/after the specified point are required
|
|
*/
|
|
if (isFirstLastRowQuery(pQuery)) {
|
|
assert(!QUERY_IS_ASC_QUERY(pQuery));
|
|
} else {
|
|
assert(QUERY_IS_ASC_QUERY(pQuery));
|
|
}
|
|
assert(pPointInterpSupporter != NULL && pQuery->skey == pQuery->ekey);
|
|
|
|
SCacheBlock *pBlock = NULL;
|
|
|
|
qTrace("QInfo:%p get next data point, fileId:%d, slot:%d, pos:%d", GET_QINFO_ADDR(pQuery), pQuery->fileId,
|
|
pQuery->slot, pQuery->pos);
|
|
|
|
// save the point that is directly after or equals to the specified point
|
|
getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pNextPoint, pQuery->pos);
|
|
|
|
/*
|
|
* 1. for last_row query, return immediately.
|
|
* 2. the specified timestamp equals to the required key, interpolation according to neighbor points is not necessary
|
|
* for interp query.
|
|
*/
|
|
TSKEY actualKey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0];
|
|
if (isFirstLastRowQuery(pQuery) || actualKey == pQuery->skey) {
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
|
|
/*
|
|
* the retrieved ts may not equals to pMeterObj->lastKey due to cache re-allocation
|
|
* set the pQuery->ekey/pQuery->skey/pQuery->lastKey to be the new value.
|
|
*/
|
|
if (pQuery->ekey != actualKey) {
|
|
pQuery->skey = actualKey;
|
|
pQuery->ekey = actualKey;
|
|
pQuery->lastKey = actualKey;
|
|
pSupporter->rawSKey = actualKey;
|
|
pSupporter->rawEKey = actualKey;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* the qualified point is not the first point in data block */
|
|
if (pQuery->pos > 0) {
|
|
int32_t prevPos = pQuery->pos - 1;
|
|
|
|
/* save the point that is directly after the specified point */
|
|
getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, prevPos);
|
|
} else {
|
|
__block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm];
|
|
|
|
// savePointPosition(&pRuntimeEnv->startPos, pQuery->fileId, pQuery->slot, pQuery->pos);
|
|
|
|
// backwards movement would not set the pQuery->pos correct. We need to set it manually later.
|
|
moveToNextBlock(pRuntimeEnv, QUERY_DESC_FORWARD_STEP, searchFn, true);
|
|
|
|
/*
|
|
* no previous data exists.
|
|
* reset the status and load the data block that contains the qualified point
|
|
*/
|
|
if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) {
|
|
qTrace("QInfo:%p no previous data block, start fileId:%d, slot:%d, pos:%d, qrange:%" PRId64 "-%" PRId64
|
|
", out of range",
|
|
GET_QINFO_ADDR(pQuery), pRuntimeEnv->startPos.fileId, pRuntimeEnv->startPos.slot,
|
|
pRuntimeEnv->startPos.pos, pQuery->skey, pQuery->ekey);
|
|
|
|
// no result, return immediately
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
return false;
|
|
} else { // prev has been located
|
|
if (pQuery->fileId >= 0) {
|
|
pQuery->pos = pQuery->pBlock[pQuery->slot].numOfPoints - 1;
|
|
getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos);
|
|
|
|
qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery),
|
|
pQuery->fileId, pQuery->slot, pQuery->pos, pQuery->pos);
|
|
} else {
|
|
// moveToNextBlock make sure there is a available cache block, if exists
|
|
assert(vnodeIsDatablockLoaded(pRuntimeEnv, pMeterObj, -1, true) == DISK_BLOCK_NO_NEED_TO_LOAD);
|
|
pBlock = &pRuntimeEnv->cacheBlock;
|
|
|
|
pQuery->pos = pBlock->numOfPoints - 1;
|
|
getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos);
|
|
|
|
qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery),
|
|
pQuery->fileId, pQuery->slot, pBlock->numOfPoints - 1, pQuery->pos);
|
|
}
|
|
}
|
|
}
|
|
|
|
pQuery->skey = *(TSKEY *)pPointInterpSupporter->pPrevPoint[0];
|
|
pQuery->ekey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0];
|
|
pQuery->lastKey = pQuery->skey;
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
|
|
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
|
|
SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
|
|
|
|
char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
|
|
if (isNull(pElem, pFilterInfo->info.type)) {
|
|
return false;
|
|
}
|
|
|
|
bool qualified = false;
|
|
for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
|
|
SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
|
|
|
|
if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
|
|
qualified = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!qualified) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
bool hasMainFunction = hasMainOutput(pQuery);
|
|
|
|
int64_t maxOutput = 0;
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
|
|
/*
|
|
* ts, tag, tagprj function can not decide the output number of current query
|
|
* the number of output result is decided by main output
|
|
*/
|
|
if (hasMainFunction &&
|
|
(functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
|
|
continue;
|
|
}
|
|
|
|
SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
|
|
if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
|
|
maxOutput = pResInfo->numOfRes;
|
|
}
|
|
}
|
|
|
|
assert(maxOutput >= 0);
|
|
return maxOutput;
|
|
}
|
|
|
|
static int32_t getGroupResultId(int32_t groupIndex) {
|
|
int32_t base = 200000;
|
|
return base + (groupIndex * 10000);
|
|
}
|
|
|
|
bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
|
|
if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
|
|
return false;
|
|
}
|
|
|
|
for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
|
|
SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
|
|
if (pColIndex->flag == TSDB_COL_NORMAL) {
|
|
/*
|
|
* make sure the normal column locates at the second position if tbname exists in group by clause
|
|
*/
|
|
if (pGroupbyExpr->numOfGroupCols > 1) {
|
|
assert(pColIndex->colIndex > 0);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
|
|
assert(pGroupbyExpr != NULL);
|
|
|
|
int32_t colId = -2;
|
|
int16_t type = TSDB_DATA_TYPE_NULL;
|
|
|
|
for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
|
|
SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
|
|
if (pColIndex->flag == TSDB_COL_NORMAL) {
|
|
colId = pColIndex->colId;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
if (colId == pQuery->colList[i].colId) {
|
|
type = pQuery->colList[i].type;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return type;
|
|
}
|
|
|
|
bool isSelectivityWithTagsQuery(SQuery *pQuery) {
|
|
bool hasTags = false;
|
|
int32_t numOfSelectivity = 0;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
|
|
hasTags = true;
|
|
continue;
|
|
}
|
|
|
|
if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
|
|
numOfSelectivity++;
|
|
}
|
|
}
|
|
|
|
if (numOfSelectivity > 0 && hasTags) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].pBase.functionId == TSDB_FUNC_TS_COMP; }
|
|
|
|
static bool limitResults(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
|
|
pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
|
|
assert(pQuery->rec.rows > 0);
|
|
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool isTopBottomQuery(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TS) {
|
|
continue;
|
|
}
|
|
|
|
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo, int32_t index) {
|
|
// for a tag column, no corresponding field info
|
|
SColIndex *pColIndexEx = &pQuery->pSelectExpr[index].pBase.colInfo;
|
|
if (TSDB_COL_IS_TAG(pColIndexEx->flag)) {
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Choose the right column field info by field id, since the file block may be out of date,
|
|
* which means the newest table schema is not equalled to the schema of this block.
|
|
*/
|
|
for (int32_t i = 0; i < pDataBlockInfo->numOfCols; ++i) {
|
|
if (pColIndexEx->colId == pStatis[i].colId) {
|
|
return &pStatis[i];
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* @param pQuery
|
|
* @param col
|
|
* @param pDataBlockInfo
|
|
* @param pStatis
|
|
* @param pColStatis
|
|
* @return
|
|
*/
|
|
static bool hasNullValue(SQuery *pQuery, int32_t col, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
|
|
SDataStatis **pColStatis) {
|
|
SColIndex *pColIndex = &pQuery->pSelectExpr[col].pBase.colInfo;
|
|
if (TSDB_COL_IS_TAG(pColIndex->flag)) {
|
|
return false;
|
|
}
|
|
|
|
// query on primary timestamp column, not null value at all
|
|
if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
|
|
return false;
|
|
}
|
|
|
|
*pColStatis = NULL;
|
|
if (pStatis != NULL) {
|
|
*pColStatis = getStatisInfo(pQuery, pStatis, pDataBlockInfo, col);
|
|
}
|
|
|
|
if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
|
|
int16_t bytes) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes);
|
|
if (p1 != NULL) {
|
|
pWindowResInfo->curIndex = *p1;
|
|
} else { // more than the capacity, reallocate the resources
|
|
if (pWindowResInfo->size >= pWindowResInfo->capacity) {
|
|
int64_t newCap = pWindowResInfo->capacity * 2;
|
|
|
|
char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
|
|
if (t != NULL) {
|
|
pWindowResInfo->pResult = (SWindowResult *)t;
|
|
memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
|
|
} else {
|
|
// todo
|
|
}
|
|
|
|
for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
|
|
SPosInfo pos = {-1, -1};
|
|
createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
|
|
}
|
|
|
|
pWindowResInfo->capacity = newCap;
|
|
}
|
|
|
|
// add a new result set for a new group
|
|
pWindowResInfo->curIndex = pWindowResInfo->size++;
|
|
taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
|
|
}
|
|
|
|
return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
|
|
}
|
|
|
|
// get the correct time window according to the handled timestamp
|
|
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
|
|
STimeWindow w = {0};
|
|
|
|
if (pWindowResInfo->curIndex == -1) { // the first window, from the previous stored value
|
|
w.skey = pWindowResInfo->prevSKey;
|
|
w.ekey = w.skey + pQuery->intervalTime - 1;
|
|
} else {
|
|
int32_t slot = curTimeWindow(pWindowResInfo);
|
|
w = getWindowResult(pWindowResInfo, slot)->window;
|
|
}
|
|
|
|
if (w.skey > ts || w.ekey < ts) {
|
|
int64_t st = w.skey;
|
|
|
|
if (st > ts) {
|
|
st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
|
|
}
|
|
|
|
int64_t et = st + pQuery->intervalTime - 1;
|
|
if (et < ts) {
|
|
st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
|
|
}
|
|
|
|
w.skey = st;
|
|
w.ekey = w.skey + pQuery->intervalTime - 1;
|
|
}
|
|
|
|
/*
|
|
* query border check, skey should not be bounded by the query time range, since the value skey will
|
|
* be used as the time window index value. So we only change ekey of time window accordingly.
|
|
*/
|
|
if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
|
|
w.ekey = pQuery->window.ekey;
|
|
}
|
|
|
|
assert(ts >= w.skey && ts <= w.ekey && w.skey != 0);
|
|
|
|
return w;
|
|
}
|
|
|
|
static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
|
|
int32_t numOfRowsPerPage) {
|
|
if (pWindowRes->pos.pageId != -1) {
|
|
return 0;
|
|
}
|
|
|
|
tFilePage *pData = NULL;
|
|
|
|
// in the first scan, new space needed for results
|
|
int32_t pageId = -1;
|
|
SIDList list = getDataBufPagesIdList(pResultBuf, sid);
|
|
|
|
if (list.size == 0) {
|
|
pData = getNewDataBuf(pResultBuf, sid, &pageId);
|
|
} else {
|
|
pageId = getLastPageId(&list);
|
|
pData = getResultBufferPageById(pResultBuf, pageId);
|
|
|
|
if (pData->numOfElems >= numOfRowsPerPage) {
|
|
pData = getNewDataBuf(pResultBuf, sid, &pageId);
|
|
if (pData != NULL) {
|
|
assert(pData->numOfElems == 0); // number of elements must be 0 for new allocated buffer
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pData == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
// set the number of rows in current disk page
|
|
if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer
|
|
pWindowRes->pos.pageId = pageId;
|
|
pWindowRes->pos.rowId = pData->numOfElems++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
|
|
STimeWindow *win) {
|
|
assert(win->skey <= win->ekey);
|
|
SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
|
|
|
|
SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
|
|
if (pWindowRes == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
// not assign result buffer yet, add new result buffer
|
|
if (pWindowRes->pos.pageId == -1) {
|
|
int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
|
|
if (ret != 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// set time window for current result
|
|
pWindowRes->window = *win;
|
|
|
|
setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
|
|
initCtxOutputBuf(pRuntimeEnv);
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
|
|
assert(slot >= 0 && slot < pWindowResInfo->size);
|
|
return &pWindowResInfo->pResult[slot].status;
|
|
}
|
|
|
|
static int32_t getForwardStepsInBlock(int32_t numOfPoints, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
|
|
int16_t order, int64_t *pData) {
|
|
int32_t endPos = searchFn((char *)pData, numOfPoints, ekey, order);
|
|
int32_t forwardStep = 0;
|
|
|
|
if (endPos >= 0) {
|
|
forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
|
|
assert(forwardStep >= 0);
|
|
|
|
// endPos data is equalled to the key so, we do need to read the element in endPos
|
|
if (pData[endPos] == ekey) {
|
|
forwardStep += 1;
|
|
}
|
|
}
|
|
|
|
return forwardStep;
|
|
}
|
|
|
|
/**
|
|
* NOTE: the query status only set for the first scan of master scan.
|
|
*/
|
|
static void doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
|
|
return;
|
|
}
|
|
|
|
// no qualified results exist, abort check
|
|
if (pWindowResInfo->size == 0) {
|
|
return;
|
|
}
|
|
|
|
// query completed
|
|
if ((lastKey >= pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(lastKey <= pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
closeAllTimeWindow(pWindowResInfo);
|
|
|
|
pWindowResInfo->curIndex = pWindowResInfo->size - 1;
|
|
setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
|
|
} else { // set the current index to be the last unclosed window
|
|
int32_t i = 0;
|
|
int64_t skey = 0;
|
|
|
|
for (i = 0; i < pWindowResInfo->size; ++i) {
|
|
SWindowResult *pResult = &pWindowResInfo->pResult[i];
|
|
if (pResult->status.closed) {
|
|
continue;
|
|
}
|
|
|
|
if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
closeTimeWindow(pWindowResInfo, i);
|
|
} else {
|
|
skey = pResult->window.skey;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// all windows are closed, set the last one to be the skey
|
|
if (skey == 0) {
|
|
assert(i == pWindowResInfo->size);
|
|
pWindowResInfo->curIndex = pWindowResInfo->size - 1;
|
|
} else {
|
|
pWindowResInfo->curIndex = i;
|
|
}
|
|
|
|
pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
|
|
|
|
// the number of completed slots are larger than the threshold, dump to client immediately.
|
|
int32_t n = numOfClosedTimeWindow(pWindowResInfo);
|
|
if (n > pWindowResInfo->threshold) {
|
|
setQueryStatus(pQuery, QUERY_RESBUF_FULL);
|
|
}
|
|
|
|
qTrace("QInfo:%p total window:%d, closed:%d", GET_QINFO_ADDR(pQuery), pWindowResInfo->size, n);
|
|
}
|
|
|
|
assert(pWindowResInfo->prevSKey != 0);
|
|
}
|
|
|
|
static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
|
|
int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn,
|
|
bool updateLastKey) {
|
|
assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
|
|
|
|
int32_t num = -1;
|
|
int32_t order = pQuery->order.order;
|
|
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
|
|
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
if (ekey < pDataBlockInfo->window.ekey) {
|
|
num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
|
|
if (num == 0) { // no qualified data in current block, do not update the lastKey value
|
|
assert(ekey < pPrimaryColumn[startPos]);
|
|
} else {
|
|
if (updateLastKey) {
|
|
pQuery->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
|
|
}
|
|
}
|
|
} else {
|
|
num = pDataBlockInfo->rows - startPos;
|
|
if (updateLastKey) {
|
|
pQuery->lastKey = pDataBlockInfo->window.ekey + step;
|
|
}
|
|
}
|
|
} else { // desc
|
|
if (ekey > pDataBlockInfo->window.skey) {
|
|
num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
|
|
if (num == 0) { // no qualified data in current block, do not update the lastKey value
|
|
assert(ekey > pPrimaryColumn[startPos]);
|
|
} else {
|
|
if (updateLastKey) {
|
|
pQuery->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
|
|
}
|
|
}
|
|
} else {
|
|
num = startPos + 1;
|
|
if (updateLastKey) {
|
|
pQuery->lastKey = pDataBlockInfo->window.skey + step;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(num >= 0);
|
|
return num;
|
|
}
|
|
|
|
static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
|
|
int32_t startPos, int32_t forwardStep, TSKEY *tsBuf) {
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
|
|
pCtx[k].nStartQueryTimestamp = pWin->skey;
|
|
pCtx[k].size = forwardStep;
|
|
pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? startPos : startPos - (forwardStep - 1);
|
|
|
|
if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
|
|
pCtx[k].ptsList = &tsBuf[pCtx[k].startOffset];
|
|
}
|
|
|
|
if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
|
|
aAggs[functionId].xFunction(&pCtx[k]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
|
|
int32_t offset) {
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
pCtx[k].nStartQueryTimestamp = pWin->skey;
|
|
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
|
|
aAggs[functionId].xFunctionF(&pCtx[k], offset);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
|
|
SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
|
|
__block_search_fn_t searchFn) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
while (1) {
|
|
if ((pNextWin->ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(pNextWin->skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
return -1;
|
|
}
|
|
|
|
getNextTimeWindow(pQuery, pNextWin);
|
|
|
|
// next time window is not in current block
|
|
if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
return -1;
|
|
}
|
|
|
|
TSKEY startKey = -1;
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
startKey = pNextWin->skey;
|
|
if (startKey < pQuery->window.skey) {
|
|
startKey = pQuery->window.skey;
|
|
}
|
|
} else {
|
|
startKey = pNextWin->ekey;
|
|
if (startKey > pQuery->window.skey) {
|
|
startKey = pQuery->window.skey;
|
|
}
|
|
}
|
|
|
|
int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
|
|
|
|
/*
|
|
* This time window does not cover any data, try next time window,
|
|
* this case may happen when the time window is too small
|
|
*/
|
|
if ((primaryKeys[startPos] > pNextWin->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(primaryKeys[startPos] < pNextWin->skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
continue;
|
|
}
|
|
|
|
return startPos;
|
|
}
|
|
}
|
|
|
|
static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
|
|
TSKEY ekey = -1;
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
ekey = pWindow->ekey;
|
|
if (ekey > pQuery->window.ekey) {
|
|
ekey = pQuery->window.ekey;
|
|
}
|
|
} else {
|
|
ekey = pWindow->skey;
|
|
if (ekey < pQuery->window.ekey) {
|
|
ekey = pQuery->window.ekey;
|
|
}
|
|
}
|
|
|
|
return ekey;
|
|
}
|
|
|
|
char *getDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
|
|
SArray *pDataBlock) {
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
char *dataBlock = NULL;
|
|
|
|
int32_t functionId = pQuery->pSelectExpr[col].pBase.functionId;
|
|
|
|
if (functionId == TSDB_FUNC_ARITHM) {
|
|
sas->pArithExpr = &pQuery->pSelectExpr[col];
|
|
|
|
// set the start offset to be the lowest start position, no matter asc/desc query order
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
pCtx->startOffset = pQuery->pos;
|
|
} else {
|
|
pCtx->startOffset = pQuery->pos - (size - 1);
|
|
}
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
SColumnInfo *pColMsg = &pQuery->colList[i];
|
|
assert(0);
|
|
// char * pData = doGetDataBlocks(pQuery, pRuntimeEnv->colDataBuffer, pQuery->colList[i].colIdxInBuf);
|
|
sas->elemSize[i] = pColMsg->bytes;
|
|
// sas->data[i] = pData + pCtx->startOffset * sas->elemSize[i]; // start from the offset
|
|
}
|
|
|
|
sas->numOfCols = pQuery->numOfCols;
|
|
sas->offset = 0;
|
|
} else { // other type of query function
|
|
SColIndex *pCol = &pQuery->pSelectExpr[col].pBase.colInfo;
|
|
if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
|
|
dataBlock = NULL;
|
|
} else {
|
|
/*
|
|
* the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
|
|
* stage, the remain meter may not have the required column in cache actually. So, the validation of required
|
|
* column in cache with the corresponding meter schema is reinforced.
|
|
*/
|
|
int32_t numOfCols = taosArrayGetSize(pDataBlock);
|
|
|
|
for (int32_t i = 0; i < numOfCols; ++i) {
|
|
SColumnInfoData *p = taosArrayGet(pDataBlock, i);
|
|
if (pCol->colId == p->info.colId) {
|
|
dataBlock = p->pData;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return dataBlock;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param pRuntimeEnv
|
|
* @param forwardStep
|
|
* @param primaryKeyCol
|
|
* @param pFields
|
|
* @param isDiskFileBlock
|
|
* @return the incremental number of output value, so it maybe 0 for fixed number of query,
|
|
* such as count/min/max etc.
|
|
*/
|
|
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
|
|
SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
|
|
__block_search_fn_t searchFn, SArray *pDataBlock) {
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SColumnInfoData *pColInfo = NULL;
|
|
TSKEY * primaryKeyCol = NULL;
|
|
|
|
if (pDataBlock != NULL) {
|
|
pColInfo = taosArrayGet(pDataBlock, 0);
|
|
primaryKeyCol = (TSKEY *)(pColInfo->pData);
|
|
}
|
|
|
|
SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
|
|
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
|
|
SDataStatis *tpField = NULL;
|
|
|
|
bool hasNull = hasNullValue(pQuery, k, pDataBlockInfo, pStatis, &tpField);
|
|
char *dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
|
|
|
|
setExecParams(pQuery, &pCtx[k], dataBlock, primaryKeyCol, pDataBlockInfo->rows, functionId, tpField, hasNull,
|
|
&sasArray[k], pRuntimeEnv->scanFlag);
|
|
}
|
|
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
if (isIntervalQuery(pQuery)) {
|
|
int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
|
|
TSKEY ts = primaryKeyCol[offset];
|
|
|
|
STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
|
|
if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->sid, &win) != TSDB_CODE_SUCCESS) {
|
|
return;
|
|
}
|
|
|
|
TSKEY ekey = reviseWindowEkey(pQuery, &win);
|
|
int32_t forwardStep =
|
|
getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, primaryKeyCol, pQuery->pos, ekey, searchFn, true);
|
|
|
|
SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
|
|
doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, primaryKeyCol);
|
|
|
|
int32_t index = pWindowResInfo->curIndex;
|
|
STimeWindow nextWin = win;
|
|
|
|
while (1) {
|
|
int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, primaryKeyCol, searchFn);
|
|
if (startPos < 0) {
|
|
break;
|
|
}
|
|
|
|
// null data, failed to allocate more memory buffer
|
|
if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->sid, &nextWin) != TSDB_CODE_SUCCESS) {
|
|
break;
|
|
}
|
|
|
|
ekey = reviseWindowEkey(pQuery, &nextWin);
|
|
forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, primaryKeyCol, startPos, ekey, searchFn, true);
|
|
|
|
pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
|
|
doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, primaryKeyCol);
|
|
}
|
|
|
|
pWindowResInfo->curIndex = index;
|
|
} else {
|
|
/*
|
|
* the sqlfunctionCtx parameters should be set done before all functions are invoked,
|
|
* since the selectivity + tag_prj query needs all parameters been set done.
|
|
* tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
|
|
*/
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
|
|
aAggs[functionId].xFunction(&pCtx[k]);
|
|
}
|
|
}
|
|
}
|
|
|
|
tfree(sasArray);
|
|
}
|
|
|
|
static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
|
|
if (isNull(pData, type)) { // ignore the null value
|
|
return -1;
|
|
}
|
|
|
|
int32_t GROUPRESULTID = 1;
|
|
|
|
SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
|
|
|
|
SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
|
|
if (pWindowRes == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
// not assign result buffer yet, add new result buffer
|
|
if (pWindowRes->pos.pageId == -1) {
|
|
int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
|
|
if (ret != 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
|
|
initCtxOutputBuf(pRuntimeEnv);
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
|
|
SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
|
|
|
|
for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
|
|
SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
|
|
if (pColIndex->flag == TSDB_COL_TAG) {
|
|
continue;
|
|
}
|
|
|
|
int16_t colIndex = -1;
|
|
int32_t colId = pColIndex->colId;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
if (pQuery->colList[i].colId == colId) {
|
|
colIndex = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
|
|
|
|
*type = pQuery->colList[colIndex].type;
|
|
*bytes = pQuery->colList[colIndex].bytes;
|
|
/*
|
|
* the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
|
|
* stage, the remain meter may not have the required column in cache actually. So, the validation of required
|
|
* column in cache with the corresponding meter schema is reinforced.
|
|
*/
|
|
int32_t numOfCols = taosArrayGetSize(pDataBlock);
|
|
|
|
for (int32_t i = 0; i < numOfCols; ++i) {
|
|
SColumnInfoData *p = taosArrayGet(pDataBlock, i);
|
|
if (pColIndex->colId == p->info.colId) {
|
|
return p->pData;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
// compare tag first
|
|
if (pCtx[0].tag.i64Key != elem.tag) {
|
|
return TS_JOIN_TAG_NOT_EQUALS;
|
|
}
|
|
|
|
TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
|
|
|
|
#if defined(_DEBUG_VIEW)
|
|
printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
|
|
elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
|
|
pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
|
|
#endif
|
|
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
if (key < elem.ts) {
|
|
return TS_JOIN_TS_NOT_EQUALS;
|
|
} else if (key > elem.ts) {
|
|
assert(false);
|
|
}
|
|
} else {
|
|
if (key > elem.ts) {
|
|
return TS_JOIN_TS_NOT_EQUALS;
|
|
} else if (key < elem.ts) {
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
return TS_JOIN_TS_EQUAL;
|
|
}
|
|
|
|
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
|
|
SResultInfo *pResInfo = GET_RES_INFO(pCtx);
|
|
|
|
if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
|
|
return false;
|
|
}
|
|
|
|
// in the supplementary scan, only the following functions need to be executed
|
|
if (IS_SUPPLEMENT_SCAN(pRuntimeEnv) &&
|
|
!(functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST ||
|
|
functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
|
|
SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
TSKEY *primaryKeyCol = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
|
|
|
|
bool groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
|
|
SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
|
|
|
|
int16_t type = 0;
|
|
int16_t bytes = 0;
|
|
|
|
char *groupbyColumnData = NULL;
|
|
if (groupbyStateValue) {
|
|
groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
|
|
}
|
|
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
|
|
SDataStatis *pColStatis = NULL;
|
|
|
|
bool hasNull = hasNullValue(pQuery, k, pDataBlockInfo, pStatis, &pColStatis);
|
|
char *dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
|
|
|
|
setExecParams(pQuery, &pCtx[k], dataBlock, primaryKeyCol, pDataBlockInfo->rows, functionId, pColStatis, hasNull,
|
|
&sasArray[k], pRuntimeEnv->scanFlag);
|
|
}
|
|
|
|
// set the input column data
|
|
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
|
|
SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
|
|
pFilterInfo->pData = getDataBlocks(pRuntimeEnv, &sasArray[k], pFilterInfo->info.colId, pDataBlockInfo->rows, pDataBlock);
|
|
}
|
|
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
// from top to bottom in desc
|
|
// from bottom to top in asc order
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery);
|
|
qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
|
|
pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
|
|
}
|
|
|
|
int32_t j = 0;
|
|
TSKEY lastKey = -1;
|
|
|
|
for (j = 0; j < pDataBlockInfo->rows; ++j) {
|
|
int32_t offset = GET_COL_DATA_POS(pQuery, j, step);
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
|
|
if (r == TS_JOIN_TAG_NOT_EQUALS) {
|
|
break;
|
|
} else if (r == TS_JOIN_TS_NOT_EQUALS) {
|
|
continue;
|
|
} else {
|
|
assert(r == TS_JOIN_TS_EQUAL);
|
|
}
|
|
}
|
|
|
|
if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
|
|
continue;
|
|
}
|
|
|
|
// interval window query
|
|
if (isIntervalQuery(pQuery)) {
|
|
// decide the time window according to the primary timestamp
|
|
int64_t ts = primaryKeyCol[offset];
|
|
STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
|
|
|
|
int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->sid, &win);
|
|
if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code
|
|
continue;
|
|
}
|
|
|
|
// all startOffset are identical
|
|
offset -= pCtx[0].startOffset;
|
|
|
|
SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
|
|
doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
|
|
|
|
lastKey = ts;
|
|
STimeWindow nextWin = win;
|
|
int32_t index = pWindowResInfo->curIndex;
|
|
|
|
while (1) {
|
|
getNextTimeWindow(pQuery, &nextWin);
|
|
if (pWindowResInfo->startTime > nextWin.skey ||
|
|
(nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(nextWin.skey > pQuery->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
break;
|
|
}
|
|
|
|
if (ts < nextWin.skey || ts > nextWin.ekey) {
|
|
break;
|
|
}
|
|
|
|
// null data, failed to allocate more memory buffer
|
|
if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->sid, &nextWin) != TSDB_CODE_SUCCESS) {
|
|
break;
|
|
}
|
|
|
|
pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
|
|
doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
|
|
}
|
|
|
|
pWindowResInfo->curIndex = index;
|
|
} else { // other queries
|
|
// decide which group this rows belongs to according to current state value
|
|
if (groupbyStateValue) {
|
|
char *stateVal = groupbyColumnData + bytes * offset;
|
|
|
|
int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, stateVal, type, bytes);
|
|
if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// update the lastKey
|
|
lastKey = primaryKeyCol[offset];
|
|
|
|
// all startOffset are identical
|
|
offset -= pCtx[0].startOffset;
|
|
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId;
|
|
if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
|
|
aAggs[functionId].xFunctionF(&pCtx[k], offset);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
// if timestamp filter list is empty, quit current query
|
|
if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
pQuery->lastKey = lastKey + step;
|
|
free(sasArray);
|
|
}
|
|
|
|
static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
|
|
SDataStatis *pStatis, __block_search_fn_t searchFn,
|
|
SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
|
|
} else {
|
|
blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
|
|
}
|
|
|
|
TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
|
|
pQuery->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
|
|
|
|
// interval query with limit applied
|
|
if (isIntervalQuery(pQuery) && pQuery->limit.limit > 0 &&
|
|
(pQuery->limit.limit + pQuery->limit.offset) <= numOfClosedTimeWindow(pWindowResInfo) &&
|
|
pRuntimeEnv->scanFlag == MASTER_SCAN) {
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
}
|
|
|
|
int32_t numOfRes = getNumOfResult(pRuntimeEnv);
|
|
|
|
// update the number of output result
|
|
if (numOfRes > 0 && pQuery->checkBuffer == 1) {
|
|
assert(numOfRes >= pQuery->rec.rows);
|
|
pQuery->rec.rows = numOfRes;
|
|
|
|
if (numOfRes >= pQuery->rec.threshold) {
|
|
setQueryStatus(pQuery, QUERY_RESBUF_FULL);
|
|
}
|
|
}
|
|
|
|
return numOfRes;
|
|
}
|
|
|
|
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void *inputData, TSKEY *tsCol, int32_t size,
|
|
int32_t functionId, SDataStatis *pStatis, bool hasNull, void *param, int32_t scanFlag) {
|
|
pCtx->scanFlag = scanFlag;
|
|
|
|
pCtx->aInputElemBuf = inputData;
|
|
pCtx->hasNull = hasNull;
|
|
|
|
if (pStatis != NULL) {
|
|
pCtx->preAggVals.isSet = true;
|
|
pCtx->preAggVals.size = size;
|
|
pCtx->preAggVals.statis = *pStatis;
|
|
} else {
|
|
pCtx->preAggVals.isSet = false;
|
|
}
|
|
|
|
pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos : 0;
|
|
pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? size - pQuery->pos : pQuery->pos + 1;
|
|
|
|
uint32_t status = aAggs[functionId].nStatus;
|
|
if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
|
|
pCtx->ptsList = &tsCol[pCtx->startOffset];
|
|
}
|
|
|
|
if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
|
|
// last_dist or first_dist function
|
|
// store the first&last timestamp into the intermediate buffer [1], the true
|
|
// value may be null but timestamp will never be null
|
|
// pCtx->ptsList = tsCol;
|
|
} else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
|
|
functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
|
|
/*
|
|
* leastsquares function needs two columns of input, currently, the x value of linear equation is set to
|
|
* timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
|
|
*
|
|
* top/bottom function needs timestamp to indicate when the
|
|
* top/bottom values emerge, so does diff function
|
|
*/
|
|
if (functionId == TSDB_FUNC_TWA) {
|
|
STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
|
|
pTWAInfo->SKey = pQuery->window.skey;
|
|
pTWAInfo->EKey = pQuery->window.ekey;
|
|
}
|
|
|
|
} else if (functionId == TSDB_FUNC_ARITHM) {
|
|
pCtx->param[1].pz = param;
|
|
}
|
|
|
|
#if defined(_DEBUG_VIEW)
|
|
// int64_t *tsList = (int64_t *)primaryColumnData;
|
|
// int64_t s = tsList[0];
|
|
// int64_t e = tsList[size - 1];
|
|
|
|
// if (IS_DATA_BLOCK_LOADED(blockStatus)) {
|
|
// qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
|
|
// functId:%d", GET_QINFO_ADDR(pQuery),
|
|
// s, e, startOffset, size, blockStatus, functionId);
|
|
// } else {
|
|
// qTrace("QInfo:%p block not loaded, bstatus:%d",
|
|
// GET_QINFO_ADDR(pQuery), blockStatus);
|
|
// }
|
|
#endif
|
|
}
|
|
|
|
// set the output buffer for the selectivity + tag query
|
|
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
|
|
if (isSelectivityWithTagsQuery(pQuery)) {
|
|
int32_t num = 0;
|
|
SQLFunctionCtx *p = NULL;
|
|
|
|
int16_t tagLen = 0;
|
|
|
|
SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase;
|
|
if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
|
|
tagLen += pCtx[i].outputBytes;
|
|
pTagCtx[num++] = &pCtx[i];
|
|
} else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
|
|
p = &pCtx[i];
|
|
} else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
|
|
// tag function may be the group by tag column
|
|
// ts may be the required primary timestamp column
|
|
continue;
|
|
} else {
|
|
// the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
|
|
}
|
|
}
|
|
|
|
p->tagInfo.pTagCtxList = pTagCtx;
|
|
p->tagInfo.numOfTagCols = num;
|
|
p->tagInfo.tagsLen = tagLen;
|
|
}
|
|
}
|
|
|
|
static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interResBytes, isStableQuery);
|
|
}
|
|
}
|
|
|
|
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
|
|
qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
|
|
pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
|
|
|
|
if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
|
|
goto _error_clean;
|
|
}
|
|
|
|
pRuntimeEnv->offset[0] = 0;
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase;
|
|
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
SColIndex* pIndex = &pSqlFuncMsg->colInfo;
|
|
|
|
int32_t index = pSqlFuncMsg->colInfo.colIndex;
|
|
if (TSDB_COL_IS_TAG(pIndex->flag)) {
|
|
if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {
|
|
pCtx->inputBytes = TSDB_TABLE_NAME_LEN;
|
|
pCtx->inputType = TSDB_DATA_TYPE_BINARY;
|
|
} else {
|
|
pCtx->inputBytes = pQuery->tagColList[index].bytes;
|
|
pCtx->inputType = pQuery->tagColList[index].type;
|
|
}
|
|
} else {
|
|
pCtx->inputBytes = pQuery->colList[index].bytes;
|
|
pCtx->inputType = pQuery->colList[index].type;
|
|
}
|
|
|
|
pCtx->ptsOutputBuf = NULL;
|
|
|
|
pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
|
|
pCtx->outputType = pQuery->pSelectExpr[i].type;
|
|
|
|
pCtx->order = pQuery->order.order;
|
|
pCtx->functionId = pSqlFuncMsg->functionId;
|
|
|
|
pCtx->numOfParams = pSqlFuncMsg->numOfParams;
|
|
for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
|
|
int16_t type = pSqlFuncMsg->arg[j].argType;
|
|
int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
|
|
if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
|
|
tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
|
|
} else {
|
|
tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
|
|
}
|
|
}
|
|
|
|
// set the order information for top/bottom query
|
|
int32_t functionId = pCtx->functionId;
|
|
|
|
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
|
|
int32_t f = pQuery->pSelectExpr[0].pBase.functionId;
|
|
assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
|
|
|
|
pCtx->param[2].i64Key = order;
|
|
pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
|
|
pCtx->param[3].i64Key = functionId;
|
|
pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
|
|
|
|
pCtx->param[1].i64Key = pQuery->order.orderColId;
|
|
}
|
|
|
|
if (i > 0) {
|
|
pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
|
|
}
|
|
}
|
|
|
|
// set the intermediate result output buffer
|
|
setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
|
|
|
|
// if it is group by normal column, do not set output buffer, the output buffer is pResult
|
|
if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
}
|
|
|
|
setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
|
|
return TSDB_CODE_SUCCESS;
|
|
|
|
_error_clean:
|
|
tfree(pRuntimeEnv->resultInfo);
|
|
tfree(pRuntimeEnv->pCtx);
|
|
|
|
return TSDB_CODE_SERV_OUT_OF_MEMORY;
|
|
}
|
|
|
|
static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
if (pRuntimeEnv->pQuery == NULL) {
|
|
return;
|
|
}
|
|
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
qTrace("QInfo:%p teardown runtime env", GET_QINFO_ADDR(pQuery));
|
|
cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
|
|
|
|
if (pRuntimeEnv->pCtx != NULL) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
|
|
for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
|
|
tVariantDestroy(&pCtx->param[j]);
|
|
}
|
|
|
|
tVariantDestroy(&pCtx->tag);
|
|
tfree(pCtx->tagInfo.pTagCtxList);
|
|
tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
|
|
}
|
|
|
|
tfree(pRuntimeEnv->resultInfo);
|
|
tfree(pRuntimeEnv->pCtx);
|
|
}
|
|
|
|
taosDestoryInterpoInfo(&pRuntimeEnv->interpoInfo);
|
|
|
|
if (pRuntimeEnv->pInterpoBuf != NULL) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
tfree(pRuntimeEnv->pInterpoBuf[i]);
|
|
}
|
|
|
|
tfree(pRuntimeEnv->pInterpoBuf);
|
|
}
|
|
|
|
destroyResultBuf(pRuntimeEnv->pResultBuf);
|
|
tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
|
|
tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
|
|
|
|
pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
|
|
}
|
|
|
|
static bool isQueryKilled(SQInfo *pQInfo) {
|
|
return (pQInfo->code == TSDB_CODE_QUERY_CANCELLED);
|
|
#if 0
|
|
/*
|
|
* check if the queried meter is going to be deleted.
|
|
* if it will be deleted soon, stop current query ASAP.
|
|
*/
|
|
SMeterObj *pMeterObj = pQInfo->pObj;
|
|
if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) {
|
|
pQInfo->killed = 1;
|
|
return true;
|
|
}
|
|
|
|
return (pQInfo->killed == 1);
|
|
#endif
|
|
}
|
|
|
|
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_QUERY_CANCELLED; }
|
|
|
|
bool isFixedOutputQuery(SQuery *pQuery) {
|
|
if (pQuery->intervalTime != 0) {
|
|
return false;
|
|
}
|
|
|
|
// Note:top/bottom query is fixed output query
|
|
if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
return true;
|
|
}
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase;
|
|
|
|
// ignore the ts_comp function
|
|
if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
|
|
pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
|
|
continue;
|
|
}
|
|
|
|
if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
|
|
continue;
|
|
}
|
|
|
|
if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isPointInterpoQuery(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionID == TSDB_FUNC_INTERP || functionID == TSDB_FUNC_LAST_ROW) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
|
|
bool isSumAvgRateQuery(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TS) {
|
|
continue;
|
|
}
|
|
|
|
if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
|
|
functionId == TSDB_FUNC_AVG_IRATE) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isFirstLastRowQuery(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionID == TSDB_FUNC_LAST_ROW) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool notHasQueryTimeRange(SQuery *pQuery) {
|
|
return (pQuery->window.skey == 0 && pQuery->window.ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(pQuery->window.skey == INT64_MAX && pQuery->window.ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery)));
|
|
}
|
|
|
|
static bool needReverseScan(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
|
|
continue;
|
|
}
|
|
|
|
if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, int64_t *realSkey,
|
|
int64_t *realEkey, STimeWindow *win) {
|
|
assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
|
|
|
|
win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
|
|
|
|
if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
|
|
/*
|
|
* if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
|
|
* realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
|
|
*/
|
|
assert(keyLast - keyFirst < pQuery->intervalTime);
|
|
|
|
*realSkey = keyFirst;
|
|
*realEkey = keyLast;
|
|
|
|
win->ekey = INT64_MAX;
|
|
return;
|
|
}
|
|
|
|
win->ekey = win->skey + pQuery->intervalTime - 1;
|
|
|
|
if (win->skey < keyFirst) {
|
|
*realSkey = keyFirst;
|
|
} else {
|
|
*realSkey = win->skey;
|
|
}
|
|
|
|
if (win->ekey < keyLast) {
|
|
*realEkey = win->ekey;
|
|
} else {
|
|
*realEkey = keyLast;
|
|
}
|
|
}
|
|
|
|
static UNUSED_FUNC bool doGetQueryPos(TSKEY key, SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter) {
|
|
#if 0
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SMeterObj * pMeterObj = pRuntimeEnv->pTabObj;
|
|
|
|
/* key in query range. If not, no qualified in disk file */
|
|
if (key != -1 && key <= pQuery->window.ekey) {
|
|
if (isPointInterpoQuery(pQuery)) { /* no qualified data in this query range */
|
|
return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter);
|
|
} else {
|
|
return true;
|
|
}
|
|
} else { // key > pQuery->window.ekey, abort for normal query, continue for interp query
|
|
if (isPointInterpoQuery(pQuery)) {
|
|
return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static UNUSED_FUNC bool doSetDataInfo(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter, void *pMeterObj,
|
|
TSKEY nextKey) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (isFirstLastRowQuery(pQuery)) {
|
|
/*
|
|
* if the pQuery->window.skey != pQuery->window.ekey for last_row query,
|
|
* the query range is existed, so set them both the value of nextKey
|
|
*/
|
|
if (pQuery->window.skey != pQuery->window.ekey) {
|
|
assert(pQuery->window.skey >= pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery) &&
|
|
nextKey >= pQuery->window.ekey && nextKey <= pQuery->window.skey);
|
|
|
|
pQuery->window.skey = nextKey;
|
|
pQuery->window.ekey = nextKey;
|
|
}
|
|
|
|
return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter);
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// TODO refactor code, the best way to implement the last_row is utilizing the iterator
|
|
bool normalizeUnBoundLastRowQuery(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter) {
|
|
#if 0
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SMeterObj *pMeterObj = pRuntimeEnv->pTabObj;
|
|
|
|
assert(!QUERY_IS_ASC_QUERY(pQuery) && notHasQueryTimeRange(pQuery));
|
|
__block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm];
|
|
|
|
TSKEY lastKey = -1;
|
|
|
|
pQuery->fileId = -1;
|
|
vnodeFreeFieldsEx(pRuntimeEnv);
|
|
|
|
// keep in-memory cache status in local variables in case that it may be changed by write operation
|
|
getBasicCacheInfoSnapshot(pQuery, pMeterObj->pCache, pMeterObj->vnode);
|
|
|
|
SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache;
|
|
if (pCacheInfo != NULL && pCacheInfo->cacheBlocks != NULL && pQuery->numOfBlocks > 0) {
|
|
pQuery->fileId = -1;
|
|
TSKEY key = pMeterObj->lastKey;
|
|
|
|
pQuery->window.skey = key;
|
|
pQuery->window.ekey = key;
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
|
|
/*
|
|
* cache block may have been flushed to disk, and no data in cache anymore.
|
|
* So, copy cache block to local buffer is required.
|
|
*/
|
|
lastKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false);
|
|
if (lastKey < 0) { // data has been flushed to disk, try again search in file
|
|
lastKey = getQueryPositionForCacheInvalid(pRuntimeEnv, searchFn);
|
|
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) {
|
|
return false;
|
|
}
|
|
}
|
|
} else { // no data in cache, try file
|
|
TSKEY key = pMeterObj->lastKeyOnFile;
|
|
|
|
pQuery->window.skey = key;
|
|
pQuery->window.ekey = key;
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
|
|
bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn);
|
|
if (!ret) { // no data in file, return false;
|
|
return false;
|
|
}
|
|
|
|
lastKey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos);
|
|
}
|
|
|
|
assert(lastKey <= pQuery->window.skey);
|
|
|
|
pQuery->window.skey = lastKey;
|
|
pQuery->window.ekey = lastKey;
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
|
|
return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter);
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
static void setScanLimitationByResultBuffer(SQuery *pQuery) {
|
|
if (isTopBottomQuery(pQuery)) {
|
|
pQuery->checkBuffer = 0;
|
|
} else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
pQuery->checkBuffer = 0;
|
|
} else {
|
|
bool hasMultioutput = false;
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase;
|
|
if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
|
|
continue;
|
|
}
|
|
|
|
hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
|
|
if (!hasMultioutput) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
pQuery->checkBuffer = hasMultioutput ? 1 : 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* todo add more parameters to check soon..
|
|
*/
|
|
bool vnodeParametersSafetyCheck(SQuery *pQuery) {
|
|
// load data column information is incorrect
|
|
for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
|
|
if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
|
|
qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
|
|
// the scan order is not matter
|
|
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
|
|
if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
|
|
functionId == TSDB_FUNC_TAG_DUMMY) {
|
|
continue;
|
|
}
|
|
|
|
if (functionId != functId && functionId != functIdDst) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }
|
|
|
|
static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }
|
|
|
|
static void changeExecuteScanOrder(SQuery *pQuery, bool metricQuery) {
|
|
// in case of point-interpolation query, use asc order scan
|
|
char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
|
|
"-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
|
|
|
|
// todo handle the case the the order irrelevant query type mixed up with order critical query type
|
|
// descending order query for last_row query
|
|
if (isFirstLastRowQuery(pQuery)) {
|
|
qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
|
|
pQuery->order.order, TSDB_ORDER_DESC);
|
|
|
|
pQuery->order.order = TSDB_ORDER_DESC;
|
|
|
|
int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
|
|
int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
|
|
|
|
pQuery->window.skey = ekey;
|
|
pQuery->window.ekey = skey;
|
|
|
|
return;
|
|
}
|
|
|
|
if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
|
|
if (!QUERY_IS_ASC_QUERY(pQuery)) {
|
|
qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
|
|
pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
}
|
|
|
|
pQuery->order.order = TSDB_ORDER_ASC;
|
|
return;
|
|
}
|
|
|
|
if (pQuery->intervalTime == 0) {
|
|
if (onlyFirstQuery(pQuery)) {
|
|
if (!QUERY_IS_ASC_QUERY(pQuery)) {
|
|
qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
|
|
pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
}
|
|
|
|
pQuery->order.order = TSDB_ORDER_ASC;
|
|
} else if (onlyLastQuery(pQuery)) {
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
|
|
pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
}
|
|
|
|
pQuery->order.order = TSDB_ORDER_DESC;
|
|
}
|
|
|
|
} else { // interval query
|
|
if (metricQuery) {
|
|
if (onlyFirstQuery(pQuery)) {
|
|
if (!QUERY_IS_ASC_QUERY(pQuery)) {
|
|
qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
|
|
pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
}
|
|
|
|
pQuery->order.order = TSDB_ORDER_ASC;
|
|
} else if (onlyLastQuery(pQuery)) {
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
|
|
pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
}
|
|
|
|
pQuery->order.order = TSDB_ORDER_DESC;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) {
|
|
assert(pCtx->param[index].pz == NULL);
|
|
|
|
int32_t len = 0;
|
|
size_t t = 0;
|
|
|
|
if (type == TSDB_DATA_TYPE_BINARY) {
|
|
t = strlen(data);
|
|
|
|
len = t + 1 + TSDB_KEYSIZE;
|
|
pCtx->param[index].pz = calloc(1, len);
|
|
} else if (type == TSDB_DATA_TYPE_NCHAR) {
|
|
t = wcslen((const wchar_t *)data);
|
|
|
|
len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE;
|
|
pCtx->param[index].pz = calloc(1, len);
|
|
} else {
|
|
len = TSDB_KEYSIZE * 2;
|
|
pCtx->param[index].pz = malloc(len);
|
|
}
|
|
|
|
pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY;
|
|
|
|
char *z = pCtx->param[index].pz;
|
|
*(TSKEY *)z = ts;
|
|
z += TSDB_KEYSIZE;
|
|
|
|
switch (type) {
|
|
case TSDB_DATA_TYPE_FLOAT:
|
|
*(double *)z = GET_FLOAT_VAL(data);
|
|
break;
|
|
case TSDB_DATA_TYPE_DOUBLE:
|
|
*(double *)z = GET_DOUBLE_VAL(data);
|
|
break;
|
|
case TSDB_DATA_TYPE_INT:
|
|
case TSDB_DATA_TYPE_BOOL:
|
|
case TSDB_DATA_TYPE_BIGINT:
|
|
case TSDB_DATA_TYPE_TINYINT:
|
|
case TSDB_DATA_TYPE_SMALLINT:
|
|
case TSDB_DATA_TYPE_TIMESTAMP:
|
|
*(int64_t *)z = GET_INT64_VAL(data);
|
|
break;
|
|
case TSDB_DATA_TYPE_BINARY:
|
|
strncpy(z, data, t);
|
|
break;
|
|
case TSDB_DATA_TYPE_NCHAR: {
|
|
wcsncpy((wchar_t *)z, (const wchar_t *)data, t);
|
|
} break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
pCtx->param[index].nLen = len;
|
|
}
|
|
|
|
/**
|
|
* param[1]: default value/previous value of specified timestamp
|
|
* param[2]: next value of specified timestamp
|
|
* param[3]: denotes if the result is a precious result or interpolation results
|
|
*
|
|
* @param pQInfo
|
|
* @param pQInfo
|
|
* @param pInterpoRaw
|
|
*/
|
|
void pointInterpSupporterSetData(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupport) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// not point interpolation query, abort
|
|
if (!isPointInterpoQuery(pQuery)) {
|
|
return;
|
|
}
|
|
|
|
int32_t count = 1;
|
|
TSKEY key = *(TSKEY *)pPointInterpSupport->pNextPoint[0];
|
|
|
|
if (key == pQuery->window.skey) {
|
|
// the queried timestamp has value, return it directly without interpolation
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT);
|
|
|
|
pRuntimeEnv->pCtx[i].param[0].i64Key = key;
|
|
pRuntimeEnv->pCtx[i].param[0].nType = TSDB_DATA_TYPE_BIGINT;
|
|
}
|
|
} else {
|
|
// set the direct previous(next) point for process
|
|
count = 2;
|
|
|
|
if (pQuery->interpoType == TSDB_INTERPO_SET_VALUE) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
|
|
// only the function of interp needs the corresponding information
|
|
if (pCtx->functionId != TSDB_FUNC_INTERP) {
|
|
continue;
|
|
}
|
|
|
|
pCtx->numOfParams = 4;
|
|
|
|
SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf;
|
|
pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail));
|
|
|
|
SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail;
|
|
|
|
// for primary timestamp column, set the flag
|
|
if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
|
|
pInterpDetail->primaryCol = 1;
|
|
}
|
|
|
|
tVariantCreateFromBinary(&pCtx->param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT);
|
|
|
|
if (isNull((char *)&pQuery->defaultVal[i], pCtx->inputType)) {
|
|
pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
|
|
} else {
|
|
tVariantCreateFromBinary(&pCtx->param[1], (char *)&pQuery->defaultVal[i], pCtx->inputBytes, pCtx->inputType);
|
|
}
|
|
|
|
pInterpDetail->ts = pQuery->window.skey;
|
|
pInterpDetail->type = pQuery->interpoType;
|
|
}
|
|
} else {
|
|
TSKEY prevKey = *(TSKEY *)pPointInterpSupport->pPrevPoint[0];
|
|
TSKEY nextKey = *(TSKEY *)pPointInterpSupport->pNextPoint[0];
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
|
|
// tag column does not need the interp environment
|
|
if (pQuery->pSelectExpr[i].pBase.functionId == TSDB_FUNC_TAG) {
|
|
continue;
|
|
}
|
|
|
|
int32_t colInBuf = 0; // pQuery->pSelectExpr[i].pBase.colInfo.colIdxInBuf;
|
|
SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf;
|
|
|
|
pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail));
|
|
SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail;
|
|
|
|
// int32_t type = GET_COLUMN_TYPE(pQuery, i);
|
|
int32_t type = 0;
|
|
assert(0);
|
|
|
|
// for primary timestamp column, set the flag
|
|
if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
|
|
pInterpDetail->primaryCol = 1;
|
|
} else {
|
|
doSetInterpVal(pCtx, prevKey, type, 1, pPointInterpSupport->pPrevPoint[colInBuf]);
|
|
doSetInterpVal(pCtx, nextKey, type, 2, pPointInterpSupport->pNextPoint[colInBuf]);
|
|
}
|
|
|
|
tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT);
|
|
|
|
pInterpDetail->ts = pQInfo->runtimeEnv.pQuery->window.skey;
|
|
pInterpDetail->type = pQuery->interpoType;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void pointInterpSupporterInit(SQuery *pQuery, SPointInterpoSupporter *pInterpoSupport) {
|
|
if (isPointInterpoQuery(pQuery)) {
|
|
pInterpoSupport->pPrevPoint = malloc(pQuery->numOfCols * POINTER_BYTES);
|
|
pInterpoSupport->pNextPoint = malloc(pQuery->numOfCols * POINTER_BYTES);
|
|
|
|
pInterpoSupport->numOfCols = pQuery->numOfCols;
|
|
|
|
/* get appropriated size for one row data source*/
|
|
int32_t len = 0;
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
len += pQuery->colList[i].bytes;
|
|
}
|
|
|
|
// assert(PRIMARY_TSCOL_LOADED(pQuery));
|
|
|
|
void *prev = calloc(1, len);
|
|
void *next = calloc(1, len);
|
|
|
|
int32_t offset = 0;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
pInterpoSupport->pPrevPoint[i] = prev + offset;
|
|
pInterpoSupport->pNextPoint[i] = next + offset;
|
|
|
|
offset += pQuery->colList[i].bytes;
|
|
}
|
|
}
|
|
}
|
|
|
|
void pointInterpSupporterDestroy(SPointInterpoSupporter *pPointInterpSupport) {
|
|
if (pPointInterpSupport->numOfCols <= 0 || pPointInterpSupport->pPrevPoint == NULL) {
|
|
return;
|
|
}
|
|
|
|
tfree(pPointInterpSupport->pPrevPoint[0]);
|
|
tfree(pPointInterpSupport->pNextPoint[0]);
|
|
|
|
tfree(pPointInterpSupport->pPrevPoint);
|
|
tfree(pPointInterpSupport->pNextPoint);
|
|
|
|
pPointInterpSupport->numOfCols = 0;
|
|
}
|
|
|
|
static UNUSED_FUNC void allocMemForInterpo(SQInfo *pQInfo, SQuery *pQuery, void *pMeterObj) {
|
|
#if 0
|
|
if (pQuery->interpoType != TSDB_INTERPO_NONE) {
|
|
assert(isIntervalQuery(pQuery) || (pQuery->intervalTime == 0 && isPointInterpoQuery(pQuery)));
|
|
|
|
if (isIntervalQuery(pQuery)) {
|
|
pQInfo->runtimeEnv.pInterpoBuf = malloc(POINTER_BYTES * pQuery->numOfOutput);
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
pQInfo->runtimeEnv.pInterpoBuf[i] =
|
|
calloc(1, sizeof(tFilePage) + pQuery->pSelectExpr[i].bytes * pMeterObj->pointsPerFileBlock);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static int32_t getInitialPageNum(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
int32_t INITIAL_RESULT_ROWS_VALUE = 16;
|
|
|
|
int32_t num = 0;
|
|
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
num = 128;
|
|
} else if (isIntervalQuery(pQuery)) { // time window query, allocate one page for each table
|
|
size_t s = pQInfo->groupInfo.numOfTables;
|
|
num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
|
|
} else { // for super table query, one page for each subset
|
|
num = 1; // pQInfo->pSidSet->numOfSubSet;
|
|
}
|
|
|
|
assert(num > 0);
|
|
return num;
|
|
}
|
|
|
|
static int32_t getRowParamForMultiRowsOutput(SQuery *pQuery, bool isSTableQuery) {
|
|
int32_t rowparam = 1;
|
|
|
|
if (isTopBottomQuery(pQuery) && (!isSTableQuery)) {
|
|
rowparam = pQuery->pSelectExpr[1].pBase.arg->argValue.i64;
|
|
}
|
|
|
|
return rowparam;
|
|
}
|
|
|
|
static int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool isSTableQuery) {
|
|
int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery);
|
|
return (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize;
|
|
}
|
|
|
|
char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
|
|
assert(pResult != NULL && pRuntimeEnv != NULL);
|
|
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
tFilePage *page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
|
|
|
|
int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery);
|
|
int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery);
|
|
|
|
return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows +
|
|
pQuery->pSelectExpr[columnIndex].bytes * realRowId;
|
|
}
|
|
|
|
/**
|
|
* decrease the refcount for each table involved in this query
|
|
* @param pQInfo
|
|
*/
|
|
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
|
|
if (pQInfo != NULL) {
|
|
// assert(taosHashGetSize(pQInfo->groupInfo) >= 1);
|
|
}
|
|
|
|
#if 0
|
|
if (pQInfo == NULL || pQInfo->groupInfo.numOfTables == 1) {
|
|
atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
|
|
qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
|
|
pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
|
|
} else {
|
|
int32_t num = 0;
|
|
for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
|
|
SMeterObj *pMeter = getMeterObj(pQInfo->groupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
|
|
atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
|
|
|
|
if (pMeter->numOfQueries > 0) {
|
|
qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
|
|
pMeter->meterId, pMeter->numOfQueries);
|
|
num++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* in order to reduce log output, for all meters of which numOfQueries count are 0,
|
|
* we do not output corresponding information
|
|
*/
|
|
num = pQInfo->groupInfo.numOfTables - num;
|
|
qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
|
|
pQInfo->groupInfo.numOfTables, num);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
UNUSED_FUNC void setTimestampRange(SQueryRuntimeEnv *pRuntimeEnv, int64_t stime, int64_t etime) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
|
|
if (functionId == TSDB_FUNC_SPREAD) {
|
|
pRuntimeEnv->pCtx[i].param[1].dKey = stime;
|
|
pRuntimeEnv->pCtx[i].param[2].dKey = etime;
|
|
|
|
pRuntimeEnv->pCtx[i].param[1].nType = TSDB_DATA_TYPE_DOUBLE;
|
|
pRuntimeEnv->pCtx[i].param[2].nType = TSDB_DATA_TYPE_DOUBLE;
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
|
|
int32_t numOfTotalPoints) {
|
|
if (pDataStatis == NULL) {
|
|
return true;
|
|
}
|
|
|
|
#if 0
|
|
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
|
|
SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
|
|
int32_t colIndex = pFilterInfo->info.colIndex;
|
|
|
|
// this column not valid in current data block
|
|
if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
|
|
continue;
|
|
}
|
|
|
|
// not support pre-filter operation on binary/nchar data type
|
|
if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
|
|
continue;
|
|
}
|
|
|
|
// all points in current column are NULL, no need to check its boundary value
|
|
if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
|
|
continue;
|
|
}
|
|
|
|
if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
|
|
float minval = *(double *)(&pDataStatis[colIndex].min);
|
|
float maxval = *(double *)(&pDataStatis[colIndex].max);
|
|
|
|
for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
|
|
if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
|
|
return true;
|
|
}
|
|
}
|
|
} else {
|
|
for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
|
|
if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
|
|
(char *)&pDataStatis[colIndex].max)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// todo disable this opt code block temporarily
|
|
// for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
// int32_t functId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
// if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
|
|
// return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
|
|
// }
|
|
// }
|
|
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
// previous time window may not be of the same size of pQuery->intervalTime
|
|
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
|
|
int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
pTimeWindow->skey += (pQuery->slidingTime * factor);
|
|
pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
|
|
}
|
|
|
|
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo, SDataStatis **pStatis) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
uint32_t r = 0;
|
|
SArray * pDataBlock = NULL;
|
|
|
|
if (pQuery->numOfFilterCols > 0) {
|
|
r = BLK_DATA_ALL_NEEDED;
|
|
} else {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
int32_t colId = pQuery->pSelectExpr[i].pBase.colInfo.colId;
|
|
r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
|
|
}
|
|
|
|
if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
|
|
r |= BLK_DATA_ALL_NEEDED;
|
|
}
|
|
}
|
|
|
|
if (r == BLK_DATA_NO_NEEDED) {
|
|
qTrace("QInfo:%p slot:%d, data block ignored, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
|
|
pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
|
|
} else if (r == BLK_DATA_FILEDS_NEEDED) {
|
|
if (tsdbRetrieveDataBlockStatisInfo(pRuntimeEnv->pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
|
|
// return DISK_DATA_LOAD_FAILED;
|
|
}
|
|
|
|
if (*pStatis == NULL) {
|
|
pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
|
|
}
|
|
} else {
|
|
assert(r == BLK_DATA_ALL_NEEDED);
|
|
if (tsdbRetrieveDataBlockStatisInfo(pRuntimeEnv->pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
|
|
// return DISK_DATA_LOAD_FAILED;
|
|
}
|
|
|
|
/*
|
|
* if this block is completed included in the query range, do more filter operation
|
|
* filter the data block according to the value filter condition.
|
|
* no need to load the data block, continue for next block
|
|
*/
|
|
if (!needToLoadDataBlock(pQuery, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
|
|
#if defined(_DEBUG_VIEW)
|
|
qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
|
|
#endif
|
|
// return DISK_DATA_DISCARDED;
|
|
}
|
|
|
|
pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
|
|
}
|
|
|
|
return pDataBlock;
|
|
}
|
|
|
|
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
|
|
int32_t midPos = -1;
|
|
int32_t numOfPoints;
|
|
|
|
if (num <= 0) {
|
|
return -1;
|
|
}
|
|
|
|
assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
|
|
|
|
TSKEY * keyList = (TSKEY *)pValue;
|
|
int32_t firstPos = 0;
|
|
int32_t lastPos = num - 1;
|
|
|
|
if (order == TSDB_ORDER_DESC) {
|
|
// find the first position which is smaller than the key
|
|
while (1) {
|
|
if (key >= keyList[lastPos]) return lastPos;
|
|
if (key == keyList[firstPos]) return firstPos;
|
|
if (key < keyList[firstPos]) return firstPos - 1;
|
|
|
|
numOfPoints = lastPos - firstPos + 1;
|
|
midPos = (numOfPoints >> 1) + firstPos;
|
|
|
|
if (key < keyList[midPos]) {
|
|
lastPos = midPos - 1;
|
|
} else if (key > keyList[midPos]) {
|
|
firstPos = midPos + 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// find the first position which is bigger than the key
|
|
while (1) {
|
|
if (key <= keyList[firstPos]) return firstPos;
|
|
if (key == keyList[lastPos]) return lastPos;
|
|
|
|
if (key > keyList[lastPos]) {
|
|
lastPos = lastPos + 1;
|
|
if (lastPos >= num)
|
|
return -1;
|
|
else
|
|
return lastPos;
|
|
}
|
|
|
|
numOfPoints = lastPos - firstPos + 1;
|
|
midPos = (numOfPoints >> 1) + firstPos;
|
|
|
|
if (key < keyList[midPos]) {
|
|
lastPos = midPos - 1;
|
|
} else if (key > keyList[midPos]) {
|
|
firstPos = midPos + 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return midPos;
|
|
}
|
|
|
|
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
|
|
GET_QINFO_ADDR(pRuntimeEnv), pQuery->window.skey, pQuery->window.ekey, pQuery->lastKey, pQuery->order.order);
|
|
|
|
TsdbQueryHandleT pQueryHandle =
|
|
pRuntimeEnv->scanFlag == MASTER_SCAN ? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
|
|
while (tsdbNextDataBlock(pQueryHandle)) {
|
|
if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
|
|
return 0;
|
|
}
|
|
|
|
SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
|
|
|
|
// todo extract methods
|
|
if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == 0) {
|
|
TSKEY skey1, ekey1;
|
|
STimeWindow w = {0};
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1,
|
|
&ekey1, &w);
|
|
pWindowResInfo->startTime = w.skey;
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
} else {
|
|
// the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
|
|
getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1,
|
|
&ekey1, &w);
|
|
|
|
pWindowResInfo->startTime = pQuery->window.skey;
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
}
|
|
}
|
|
|
|
// in case of prj/diff query, ensure the output buffer is sufficient to accomodate the results of current block
|
|
if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
|
|
SResultRec *pRec = &pQuery->rec;
|
|
|
|
if (pQuery->rec.capacity - pQuery->rec.rows < blockInfo.rows) {
|
|
int32_t remain = pRec->capacity - pRec->rows;
|
|
int32_t newSize = pRec->capacity + (blockInfo.rows - remain);
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t bytes = pQuery->pSelectExpr[i].bytes;
|
|
|
|
char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(SData));
|
|
if (tmp == NULL) { // todo handle the oom
|
|
} else {
|
|
pQuery->sdata[i] = (SData *)tmp;
|
|
}
|
|
|
|
// set the pCtx output buffer position
|
|
pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
|
|
}
|
|
|
|
pRec->capacity = newSize;
|
|
}
|
|
}
|
|
|
|
SDataStatis *pStatis = NULL;
|
|
SArray * pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, &blockInfo, &pStatis);
|
|
|
|
pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1;
|
|
int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey,
|
|
&pRuntimeEnv->windowResInfo, pDataBlock);
|
|
|
|
qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", rows:%d, res:%d", GET_QINFO_ADDR(pRuntimeEnv),
|
|
blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes);
|
|
|
|
// save last access position
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if the result buffer is not full, set the query completed flag
|
|
if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
}
|
|
|
|
if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;
|
|
|
|
closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
|
|
removeRedundantWindow(&pRuntimeEnv->windowResInfo, pQuery->lastKey - step, step);
|
|
pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;
|
|
} else {
|
|
assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void updatelastkey(SQuery *pQuery, STableQueryInfo *pTableQInfo) { pTableQInfo->lastKey = pQuery->lastKey; }
|
|
|
|
/*
|
|
* set tag value in SQLFunctionCtx
|
|
* e.g.,tag information into input buffer
|
|
*/
|
|
static void doSetTagValueInParam(void *tsdb, STableId id, int32_t tagColId, tVariant *param) {
|
|
tVariantDestroy(param);
|
|
|
|
char * val = NULL;
|
|
int16_t bytes = 0;
|
|
int16_t type = 0;
|
|
|
|
if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
|
|
tsdbTableGetName(tsdb, id, &val);
|
|
bytes = TSDB_TABLE_NAME_LEN;
|
|
type = TSDB_DATA_TYPE_BINARY;
|
|
} else {
|
|
tsdbGetTableTagVal(tsdb, id, tagColId, &type, &bytes, &val);
|
|
}
|
|
|
|
tVariantCreateFromBinary(param, val, bytes, type);
|
|
|
|
if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
|
|
tfree(val);
|
|
}
|
|
}
|
|
|
|
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, STableId id, void *tsdb) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SSqlFuncMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase;
|
|
if (pQuery->numOfOutput == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) {
|
|
assert(pFuncMsg->numOfParams == 1);
|
|
doSetTagValueInParam(tsdb, id, pFuncMsg->arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag);
|
|
} else {
|
|
// set tag value, by which the results are aggregated.
|
|
for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
|
|
SColIndex *pCol = &pQuery->pSelectExpr[idx].pBase.colInfo;
|
|
|
|
// ts_comp column required the tag value for join filter
|
|
if (!TSDB_COL_IS_TAG(pCol->flag)) {
|
|
continue;
|
|
}
|
|
|
|
// todo use tag column index to optimize performance
|
|
doSetTagValueInParam(tsdb, id, pCol->colId, &pRuntimeEnv->pCtx[idx].tag);
|
|
}
|
|
|
|
// set the join tag for first column
|
|
if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
|
|
pRuntimeEnv->pTSBuf != NULL) {
|
|
assert(pFuncMsg->numOfParams == 1);
|
|
assert(0); // to do fix me
|
|
// doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (!mergeFlag) {
|
|
pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
|
|
pCtx[i].currentStage = FIRST_STAGE_MERGE;
|
|
|
|
resetResultInfo(pCtx[i].resultInfo);
|
|
aAggs[functionId].init(&pCtx[i]);
|
|
}
|
|
|
|
pCtx[i].hasNull = true;
|
|
pCtx[i].nStartQueryTimestamp = timestamp;
|
|
pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
|
|
// pCtx[i].aInputElemBuf = ((char *)inputSrc->data) +
|
|
// ((int32_t)pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage) +
|
|
// pCtx[i].outputBytes * inputIdx;
|
|
|
|
// in case of tag column, the tag information should be extracted from input buffer
|
|
if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
|
|
tVariantDestroy(&pCtx[i].tag);
|
|
tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
|
|
}
|
|
}
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TAG_DUMMY) {
|
|
continue;
|
|
}
|
|
|
|
aAggs[functionId].distMergeFunc(&pCtx[i]);
|
|
}
|
|
}
|
|
|
|
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
|
|
if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
|
|
switch (srcDataType) {
|
|
case TSDB_DATA_TYPE_BINARY:
|
|
printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_TINYINT:
|
|
case TSDB_DATA_TYPE_BOOL:
|
|
printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_SMALLINT:
|
|
printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_BIGINT:
|
|
case TSDB_DATA_TYPE_TIMESTAMP:
|
|
printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_INT:
|
|
printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_FLOAT:
|
|
printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
case TSDB_DATA_TYPE_DOUBLE:
|
|
printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
|
|
break;
|
|
}
|
|
} else if (functionId == TSDB_FUNC_AVG) {
|
|
printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
|
|
} else if (functionId == TSDB_FUNC_SPREAD) {
|
|
printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
|
|
} else if (functionId == TSDB_FUNC_TWA) {
|
|
data += 1;
|
|
printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
|
|
*(int64_t *)(data + 16), *(int64_t *)(data + 24));
|
|
} else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
|
|
switch (srcDataType) {
|
|
case TSDB_DATA_TYPE_TINYINT:
|
|
case TSDB_DATA_TYPE_BOOL:
|
|
printf("%d\t", *(int8_t *)data);
|
|
break;
|
|
case TSDB_DATA_TYPE_SMALLINT:
|
|
printf("%d\t", *(int16_t *)data);
|
|
break;
|
|
case TSDB_DATA_TYPE_BIGINT:
|
|
case TSDB_DATA_TYPE_TIMESTAMP:
|
|
printf("%" PRId64 "\t", *(int64_t *)data);
|
|
break;
|
|
case TSDB_DATA_TYPE_INT:
|
|
printf("%d\t", *(int *)data);
|
|
break;
|
|
case TSDB_DATA_TYPE_FLOAT:
|
|
printf("%f\t", *(float *)data);
|
|
break;
|
|
case TSDB_DATA_TYPE_DOUBLE:
|
|
printf("%f\t", *(float *)data);
|
|
break;
|
|
}
|
|
} else if (functionId == TSDB_FUNC_SUM) {
|
|
if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
|
|
printf("%lf\t", *(float *)data);
|
|
} else {
|
|
printf("%" PRId64 "\t", *(int64_t *)data);
|
|
}
|
|
} else {
|
|
printf("%s\t", data);
|
|
}
|
|
}
|
|
|
|
void UNUSED_FUNC displayInterResult(SData **pdata, SQuery *pQuery, int32_t numOfRows) {
|
|
#if 0
|
|
int32_t numOfCols = pQuery->numOfOutput;
|
|
printf("super table query intermediate result, total:%d\n", numOfRows);
|
|
|
|
SQInfo * pQInfo = (SQInfo *)(GET_QINFO_ADDR(pQuery));
|
|
SMeterObj *pMeterObj = pQInfo->pObj;
|
|
|
|
for (int32_t j = 0; j < numOfRows; ++j) {
|
|
for (int32_t i = 0; i < numOfCols; ++i) {
|
|
switch (pQuery->pSelectExpr[i].type) {
|
|
case TSDB_DATA_TYPE_BINARY: {
|
|
int32_t colIndex = pQuery->pSelectExpr[i].pBase.colInfo.colIndex;
|
|
int32_t type = 0;
|
|
|
|
if (TSDB_COL_IS_TAG(pQuery->pSelectExpr[i].pBase.colInfo.flag)) {
|
|
type = pQuery->pSelectExpr[i].type;
|
|
} else {
|
|
type = pMeterObj->schema[colIndex].type;
|
|
}
|
|
printBinaryData(pQuery->pSelectExpr[i].pBase.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
|
|
type);
|
|
break;
|
|
}
|
|
case TSDB_DATA_TYPE_TIMESTAMP:
|
|
case TSDB_DATA_TYPE_BIGINT:
|
|
printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
|
|
break;
|
|
case TSDB_DATA_TYPE_INT:
|
|
printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
|
|
break;
|
|
case TSDB_DATA_TYPE_FLOAT:
|
|
printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
|
|
break;
|
|
case TSDB_DATA_TYPE_DOUBLE:
|
|
printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
|
|
break;
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
typedef struct SCompSupporter {
|
|
STableDataInfo **pTableDataInfo;
|
|
int32_t * position;
|
|
SQInfo * pQInfo;
|
|
} SCompSupporter;
|
|
|
|
int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
|
|
int32_t left = *(int32_t *)pLeft;
|
|
int32_t right = *(int32_t *)pRight;
|
|
|
|
SCompSupporter * supporter = (SCompSupporter *)param;
|
|
SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
|
|
|
|
int32_t leftPos = supporter->position[left];
|
|
int32_t rightPos = supporter->position[right];
|
|
|
|
/* left source is exhausted */
|
|
if (leftPos == -1) {
|
|
return 1;
|
|
}
|
|
|
|
/* right source is exhausted*/
|
|
if (rightPos == -1) {
|
|
return -1;
|
|
}
|
|
|
|
SWindowResInfo *pWindowResInfo1 = &supporter->pTableDataInfo[left]->pTableQInfo->windowResInfo;
|
|
SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
|
|
|
|
char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
|
|
TSKEY leftTimestamp = GET_INT64_VAL(b1);
|
|
|
|
SWindowResInfo *pWindowResInfo2 = &supporter->pTableDataInfo[right]->pTableQInfo->windowResInfo;
|
|
SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
|
|
|
|
char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
|
|
TSKEY rightTimestamp = GET_INT64_VAL(b2);
|
|
|
|
if (leftTimestamp == rightTimestamp) {
|
|
return 0;
|
|
}
|
|
|
|
return leftTimestamp > rightTimestamp ? 1 : -1;
|
|
}
|
|
|
|
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
|
|
int64_t st = taosGetTimestampMs();
|
|
int32_t ret = TSDB_CODE_SUCCESS;
|
|
|
|
int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
|
|
while (pQInfo->groupIndex < numOfGroups) {
|
|
SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
|
|
ret = mergeIntoGroupResultImpl(pQInfo, group);
|
|
if (ret < 0) { // not enough disk space to save the data into disk
|
|
return -1;
|
|
}
|
|
|
|
pQInfo->groupIndex += 1;
|
|
|
|
// this group generates at least one result, return results
|
|
if (ret > 0) {
|
|
break;
|
|
}
|
|
|
|
assert(pQInfo->numOfGroupResultPages == 0);
|
|
qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
|
|
}
|
|
|
|
qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", pQInfo,
|
|
pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
|
|
if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
|
|
pQInfo->numOfGroupResultPages = 0;
|
|
|
|
// current results of group has been sent to client, try next group
|
|
if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
|
|
return; // failed to save data in the disk
|
|
}
|
|
|
|
// set current query completed
|
|
// if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == pQInfo->pSidSet->numOfSubSet) {
|
|
// pQInfo->tableIndex = pQInfo->pSidSet->numOfTables;
|
|
// return;
|
|
// }
|
|
}
|
|
|
|
SQueryRuntimeEnv * pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
|
|
|
|
int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
|
|
SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
|
|
|
|
int32_t total = 0;
|
|
for (int32_t i = 0; i < list.size; ++i) {
|
|
tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[i]);
|
|
total += pData->numOfElems;
|
|
}
|
|
|
|
int32_t rows = total;
|
|
|
|
int32_t offset = 0;
|
|
for (int32_t num = 0; num < list.size; ++num) {
|
|
tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[num]);
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
|
|
char * pDest = pQuery->sdata[i]->data;
|
|
|
|
memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->numOfElems,
|
|
bytes * pData->numOfElems);
|
|
}
|
|
|
|
offset += pData->numOfElems;
|
|
}
|
|
|
|
assert(pQuery->rec.rows == 0);
|
|
|
|
pQuery->rec.rows += rows;
|
|
pQInfo->offset += 1;
|
|
}
|
|
|
|
int64_t getNumOfResultWindowRes(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
int64_t maxOutput = 0;
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
|
|
/*
|
|
* ts, tag, tagprj function can not decide the output number of current query
|
|
* the number of output result is decided by main output
|
|
*/
|
|
if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
|
|
continue;
|
|
}
|
|
|
|
SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
|
|
if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
|
|
maxOutput = pResultInfo->numOfRes;
|
|
}
|
|
}
|
|
|
|
return maxOutput;
|
|
}
|
|
|
|
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
size_t size = taosArrayGetSize(pGroup);
|
|
|
|
tFilePage **buffer = (tFilePage **)pQuery->sdata;
|
|
int32_t * posList = calloc(size, sizeof(int32_t));
|
|
|
|
STableDataInfo **pTableList = malloc(POINTER_BYTES * size);
|
|
|
|
// todo opt for the case of one table per group
|
|
int32_t numOfTables = 0;
|
|
for (int32_t i = 0; i < size; ++i) {
|
|
SPair * p = taosArrayGet(pGroup, i);
|
|
STableDataInfo *pInfo = p->sec;
|
|
|
|
SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pInfo->pTableQInfo->tid);
|
|
if (list.size > 0 && pInfo->pTableQInfo->windowResInfo.size > 0) {
|
|
pTableList[numOfTables] = pInfo;
|
|
numOfTables += 1;
|
|
}
|
|
}
|
|
|
|
if (numOfTables == 0) {
|
|
tfree(posList);
|
|
tfree(pTableList);
|
|
|
|
assert(pQInfo->numOfGroupResultPages == 0);
|
|
return 0;
|
|
}
|
|
|
|
SCompSupporter cs = {pTableList, posList, pQInfo};
|
|
|
|
SLoserTreeInfo *pTree = NULL;
|
|
tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
|
|
|
|
SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
|
|
setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
|
|
resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
|
|
|
|
int64_t lastTimestamp = -1;
|
|
int64_t startt = taosGetTimestampMs();
|
|
|
|
while (1) {
|
|
int32_t pos = pTree->pNode[0].index;
|
|
|
|
SWindowResInfo *pWindowResInfo = &pTableList[pos]->pTableQInfo->windowResInfo;
|
|
SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
|
|
|
|
char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
|
|
TSKEY ts = GET_INT64_VAL(b);
|
|
|
|
assert(ts == pWindowRes->window.skey);
|
|
int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
|
|
if (num <= 0) {
|
|
cs.position[pos] += 1;
|
|
|
|
if (cs.position[pos] >= pWindowResInfo->size) {
|
|
cs.position[pos] = -1;
|
|
|
|
// all input sources are exhausted
|
|
if (--numOfTables == 0) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
if (ts == lastTimestamp) { // merge with the last one
|
|
doMerge(pRuntimeEnv, ts, pWindowRes, true);
|
|
} else { // copy data to disk buffer
|
|
if (buffer[0]->numOfElems == pQuery->rec.capacity) {
|
|
if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
|
|
return -1;
|
|
}
|
|
|
|
resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
|
|
}
|
|
|
|
doMerge(pRuntimeEnv, ts, pWindowRes, false);
|
|
buffer[0]->numOfElems += 1;
|
|
}
|
|
|
|
lastTimestamp = ts;
|
|
|
|
cs.position[pos] += 1;
|
|
if (cs.position[pos] >= pWindowResInfo->size) {
|
|
cs.position[pos] = -1;
|
|
|
|
// all input sources are exhausted
|
|
if (--numOfTables == 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
|
|
}
|
|
|
|
if (buffer[0]->numOfElems != 0) { // there are data in buffer
|
|
if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
|
|
qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
|
|
|
|
tfree(pTree);
|
|
tfree(pTableList);
|
|
tfree(posList);
|
|
tfree(pResultInfo);
|
|
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
int64_t endt = taosGetTimestampMs();
|
|
|
|
#ifdef _DEBUG_VIEW
|
|
displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->num);
|
|
#endif
|
|
|
|
qTrace("QInfo:%p result merge completed, elapsed time:%" PRId64 " ms", GET_QINFO_ADDR(pQuery), endt - startt);
|
|
tfree(pTree);
|
|
tfree(pTableList);
|
|
tfree(posList);
|
|
|
|
pQInfo->offset = 0;
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
tfree(pResultInfo[i].interResultBuf);
|
|
}
|
|
|
|
tfree(pResultInfo);
|
|
return pQInfo->numOfGroupResultPages;
|
|
}
|
|
|
|
int32_t flushFromResultBuf(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
|
|
int32_t capacity = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
|
|
|
|
// the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
|
|
int32_t pageId = -1;
|
|
|
|
int32_t remain = pQuery->sdata[0]->num;
|
|
int32_t offset = 0;
|
|
|
|
while (remain > 0) {
|
|
int32_t r = remain;
|
|
if (r > capacity) {
|
|
r = capacity;
|
|
}
|
|
|
|
int32_t id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
|
|
tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
|
|
|
|
// pagewise copy to dest buffer
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
|
|
buf->numOfElems = r;
|
|
|
|
memcpy(buf->data + pRuntimeEnv->offset[i] * buf->numOfElems, ((char *)pQuery->sdata[i]->data) + offset * bytes,
|
|
buf->numOfElems * bytes);
|
|
}
|
|
|
|
offset += r;
|
|
remain -= r;
|
|
}
|
|
|
|
pQInfo->numOfGroupResultPages += 1;
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
|
|
pCtx[k].size = 1;
|
|
pCtx[k].startOffset = 0;
|
|
pCtx[k].resultInfo = &pResultInfo[k];
|
|
|
|
pQuery->sdata[k]->num = 0;
|
|
}
|
|
}
|
|
|
|
void setTableDataInfo(STableDataInfo *pTableDataInfo, int32_t tableIndex, int32_t groupId) {
|
|
pTableDataInfo->groupIdx = groupId;
|
|
pTableDataInfo->tableIndex = tableIndex;
|
|
}
|
|
|
|
static void doDisableFunctsForSupplementaryScan(SQuery *pQuery, SWindowResInfo *pWindowResInfo, int32_t order) {
|
|
for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
|
|
SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
|
|
if (!pStatus->closed) {
|
|
continue;
|
|
}
|
|
|
|
SWindowResult *buf = getWindowResult(pWindowResInfo, i);
|
|
|
|
// open/close the specified query for each group result
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
|
|
if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
|
|
((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
|
|
buf->resultInfo[j].complete = false;
|
|
} else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
|
|
buf->resultInfo[j].complete = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void disableFuncInReverseScan(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
int32_t order = pQuery->order.order;
|
|
|
|
// group by normal columns and interval query on normal table
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
|
|
doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order);
|
|
} else { // for simple result of table query,
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
|
|
|
|
if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
|
|
((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
|
|
pCtx->resultInfo->complete = false;
|
|
} else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
|
|
pCtx->resultInfo->complete = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void disableFuncForReverseScan(SQInfo *pQInfo, int32_t order) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u;
|
|
}
|
|
|
|
if (isIntervalQuery(pQuery)) {
|
|
// for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
|
|
// STableQueryInfo *pTableQueryInfo = pQInfo->pTableDataInfo[i].pTableQInfo;
|
|
// SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
|
|
//
|
|
// doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order);
|
|
// }
|
|
} else {
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order);
|
|
}
|
|
|
|
pQuery->order.order = (pQuery->order.order) ^ 1u;
|
|
}
|
|
|
|
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SWITCH_ORDER(pRuntimeEnv->pCtx[i]
|
|
.order); // = (pRuntimeEnv->pCtx[i].order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC;
|
|
}
|
|
}
|
|
|
|
void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
|
|
int32_t numOfCols = pQuery->numOfOutput;
|
|
|
|
pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
|
|
pResultRow->pos = *posInfo;
|
|
|
|
// set the intermediate result output buffer
|
|
setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
|
|
}
|
|
|
|
void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
pCtx->aOutputBuf = pQuery->sdata[i]->data;
|
|
|
|
/*
|
|
* set the output buffer information and intermediate buffer
|
|
* not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
|
|
*/
|
|
resetResultInfo(&pRuntimeEnv->resultInfo[i]);
|
|
pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
|
|
|
|
// set the timestamp output buffer for top/bottom/diff query
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
|
|
pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
|
|
}
|
|
|
|
memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
|
|
}
|
|
|
|
initCtxOutputBuf(pRuntimeEnv);
|
|
}
|
|
|
|
void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// reset the execution contexts
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
assert(functionId != TSDB_FUNC_DIFF);
|
|
|
|
// set next output position
|
|
if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
|
|
pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
|
|
}
|
|
|
|
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
|
|
/*
|
|
* NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
|
|
* in the procedure of top/bottom routine
|
|
* the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
|
|
*
|
|
* diff function is handled in multi-output function
|
|
*/
|
|
pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
|
|
}
|
|
|
|
resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
|
|
}
|
|
}
|
|
|
|
void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
|
|
pRuntimeEnv->pCtx[j].currentStage = 0;
|
|
aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
|
|
}
|
|
}
|
|
|
|
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
|
|
return;
|
|
}
|
|
|
|
if (pQuery->rec.rows <= pQuery->limit.offset) {
|
|
pQuery->limit.offset -= pQuery->rec.rows;
|
|
pQuery->rec.rows = 0;
|
|
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
|
|
// clear the buffer is full flag if exists
|
|
pQuery->status &= (~QUERY_RESBUF_FULL);
|
|
} else {
|
|
int32_t numOfSkip = (int32_t) pQuery->limit.offset;
|
|
pQuery->rec.rows -= numOfSkip;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
|
|
|
|
memmove(pQuery->sdata[i]->data, pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
|
|
pRuntimeEnv->pCtx[i].aOutputBuf += bytes * numOfSkip;
|
|
|
|
if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
|
|
pRuntimeEnv->pCtx[i].ptsOutputBuf += TSDB_KEYSIZE * numOfSkip;
|
|
}
|
|
}
|
|
|
|
pQuery->limit.offset = 0;
|
|
}
|
|
}
|
|
|
|
void setQueryStatus(SQuery *pQuery, int8_t status) {
|
|
if (status == QUERY_NOT_COMPLETED) {
|
|
pQuery->status = status;
|
|
} else {
|
|
// QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
|
|
pQuery->status &= (~QUERY_NOT_COMPLETED);
|
|
pQuery->status |= status;
|
|
}
|
|
}
|
|
|
|
bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
bool toContinue = false;
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
|
|
// for each group result, call the finalize function for each column
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
|
|
for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
|
|
SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
|
|
if (!pResult->status.closed) {
|
|
continue;
|
|
}
|
|
|
|
setWindowResOutputBuf(pRuntimeEnv, pResult);
|
|
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int16_t functId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
if (functId == TSDB_FUNC_TS) {
|
|
continue;
|
|
}
|
|
|
|
aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
|
|
SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
|
|
|
|
toContinue |= (!pResInfo->complete);
|
|
}
|
|
}
|
|
} else {
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int16_t functId = pQuery->pSelectExpr[j].pBase.functionId;
|
|
if (functId == TSDB_FUNC_TS) {
|
|
continue;
|
|
}
|
|
|
|
aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
|
|
SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
|
|
|
|
toContinue |= (!pResInfo->complete);
|
|
}
|
|
}
|
|
|
|
return toContinue;
|
|
}
|
|
|
|
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SQueryStatusInfo info = {
|
|
.status = pQuery->status,
|
|
.windowIndex = pRuntimeEnv->windowResInfo.curIndex,
|
|
.lastKey = pQuery->lastKey,
|
|
.w = pQuery->window,
|
|
.curWindow = {.skey = pQuery->lastKey, .ekey = pQuery->window.ekey},
|
|
};
|
|
|
|
return info;
|
|
}
|
|
|
|
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
|
|
SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); // save the cursor
|
|
if (pRuntimeEnv->pTSBuf) {
|
|
SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
|
|
tsBufNextPos(pRuntimeEnv->pTSBuf);
|
|
}
|
|
|
|
// reverse order time range
|
|
pQuery->window = pStatus->curWindow;
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
|
|
SWITCH_ORDER(pQuery->order.order);
|
|
SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv);
|
|
|
|
STsdbQueryCond cond = {
|
|
.twindow = pQuery->window,
|
|
.order = pQuery->order.order,
|
|
.colList = pQuery->colList,
|
|
.numOfCols = pQuery->numOfCols,
|
|
};
|
|
|
|
// clean unused handle
|
|
if (pRuntimeEnv->pSecQueryHandle != NULL) {
|
|
tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
|
|
}
|
|
|
|
pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->groupInfo);
|
|
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
switchCtxOrder(pRuntimeEnv);
|
|
disableFuncInReverseScan(pRuntimeEnv);
|
|
}
|
|
|
|
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SWITCH_ORDER(pQuery->order.order);
|
|
switchCtxOrder(pRuntimeEnv);
|
|
|
|
tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
|
|
if (pRuntimeEnv->pTSBuf) {
|
|
pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
|
|
}
|
|
|
|
SET_MASTER_SCAN_FLAG(pRuntimeEnv);
|
|
|
|
// update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
|
|
// during reverse scan
|
|
pQuery->lastKey = pStatus->lastKey;
|
|
pQuery->status = pStatus->status;
|
|
pQuery->window = pStatus->w;
|
|
}
|
|
|
|
void scanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
|
|
// store the start query position
|
|
SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
|
|
SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv);
|
|
|
|
SET_MASTER_SCAN_FLAG(pRuntimeEnv);
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
while (1) {
|
|
doScanAllDataBlocks(pRuntimeEnv);
|
|
|
|
if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
|
|
qstatus.status = pQuery->status;
|
|
qstatus.curWindow.ekey = pQuery->lastKey - step;
|
|
}
|
|
|
|
if (!needScanDataBlocksAgain(pRuntimeEnv)) {
|
|
// restore the status code and jump out of loop
|
|
if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
|
|
pQuery->status = qstatus.status;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
STsdbQueryCond cond = {
|
|
.twindow = qstatus.curWindow,
|
|
.order = pQuery->order.order,
|
|
.colList = pQuery->colList,
|
|
.numOfCols = pQuery->numOfCols,
|
|
};
|
|
|
|
if (pRuntimeEnv->pSecQueryHandle != NULL) {
|
|
tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
|
|
}
|
|
|
|
pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->groupInfo);
|
|
pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
|
|
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
pRuntimeEnv->scanFlag = REPEAT_SCAN;
|
|
|
|
// check if query is killed or not
|
|
if (isQueryKilled(pQInfo)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (!needReverseScan(pQuery)) {
|
|
return;
|
|
}
|
|
|
|
setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
|
|
|
|
// reverse scan from current position
|
|
qTrace("QInfo:%p start to reverse scan", pQInfo);
|
|
doScanAllDataBlocks(pRuntimeEnv);
|
|
|
|
clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
|
|
}
|
|
|
|
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
|
|
// for each group result, call the finalize function for each column
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
closeAllTimeWindow(pWindowResInfo);
|
|
}
|
|
|
|
for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
|
|
SWindowResult *buf = &pWindowResInfo->pResult[i];
|
|
if (!isWindowResClosed(pWindowResInfo, i)) {
|
|
continue;
|
|
}
|
|
|
|
setWindowResOutputBuf(pRuntimeEnv, buf);
|
|
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
|
|
}
|
|
|
|
/*
|
|
* set the number of output results for group by normal columns, the number of output rows usually is 1 except
|
|
* the top and bottom query
|
|
*/
|
|
buf->numOfRows = getNumOfResult(pRuntimeEnv);
|
|
}
|
|
|
|
} else {
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool hasMainOutput(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
|
|
if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, int32_t tid, STimeWindow win) {
|
|
STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
|
|
|
|
pTableQueryInfo->win = win;
|
|
pTableQueryInfo->lastKey = win.skey;
|
|
|
|
pTableQueryInfo->tid = tid;
|
|
pTableQueryInfo->cur.vnodeIndex = -1;
|
|
|
|
initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
|
|
return pTableQueryInfo;
|
|
}
|
|
|
|
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
|
|
if (pTableQueryInfo == NULL) {
|
|
return;
|
|
}
|
|
|
|
cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
|
|
free(pTableQueryInfo);
|
|
}
|
|
|
|
void changeMeterQueryInfoForSuppleQuery(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
|
|
if (pTableQueryInfo == NULL) {
|
|
return;
|
|
}
|
|
|
|
// order has change already!
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
if (!QUERY_IS_ASC_QUERY(pQuery)) {
|
|
assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
|
|
} else {
|
|
assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
|
|
}
|
|
|
|
pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
|
|
|
|
SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
|
|
pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
|
|
|
|
pTableQueryInfo->cur.order = pTableQueryInfo->cur.order ^ 1u;
|
|
pTableQueryInfo->cur.vnodeIndex = -1;
|
|
}
|
|
|
|
void restoreIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
pQuery->window = pTableQueryInfo->win;
|
|
pQuery->lastKey = pTableQueryInfo->lastKey;
|
|
|
|
assert(((pQuery->lastKey >= pQuery->window.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
((pQuery->lastKey <= pQuery->window.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
|
|
}
|
|
|
|
/**
|
|
* set output buffer for different group
|
|
* @param pRuntimeEnv
|
|
* @param pDataBlockInfo
|
|
*/
|
|
void setExecutionContext(SQInfo *pQInfo, STableQueryInfo *pTableQueryInfo, STable *pTable, int32_t groupIdx,
|
|
TSKEY nextKey) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SWindowResInfo * pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
int32_t GROUPRESULTID = 1;
|
|
|
|
SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIdx, sizeof(groupIdx));
|
|
if (pWindowRes == NULL) {
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* not assign result buffer yet, add new result buffer
|
|
* all group belong to one result set, and each group result has different group id so set the id to be one
|
|
*/
|
|
if (pWindowRes->pos.pageId == -1) {
|
|
if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
|
|
TSDB_CODE_SUCCESS) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
|
|
initCtxOutputBuf(pRuntimeEnv);
|
|
|
|
pTableQueryInfo->lastKey = nextKey;
|
|
setAdditionalInfo(pQInfo, pTable, pTableQueryInfo);
|
|
}
|
|
|
|
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// Note: pResult->pos[i]->numOfElems == 0, there is only fixed number of results for each group
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
|
|
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
|
|
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
|
|
pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
|
|
}
|
|
|
|
/*
|
|
* set the output buffer information and intermediate buffer
|
|
* not all queries require the interResultBuf, such as COUNT
|
|
*/
|
|
pCtx->resultInfo = &pResult->resultInfo[i];
|
|
|
|
// set super table query flag
|
|
SResultInfo *pResInfo = GET_RES_INFO(pCtx);
|
|
pResInfo->superTableQ = pRuntimeEnv->stableQuery;
|
|
}
|
|
}
|
|
|
|
int32_t setAdditionalInfo(SQInfo *pQInfo, STable *pTable, STableQueryInfo *pTableQueryInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
assert(pTableQueryInfo->lastKey >= 0);
|
|
|
|
setTagVal(pRuntimeEnv, pTable->tableId, pQInfo->tsdb);
|
|
|
|
// both the master and supplement scan needs to set the correct ts comp start position
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
if (pTableQueryInfo->cur.vnodeIndex == -1) {
|
|
pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
|
|
|
|
tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
|
|
|
|
// keep the cursor info of current meter
|
|
pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
|
|
} else {
|
|
tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* There are two cases to handle:
|
|
*
|
|
* 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
|
|
* pQuery->window.skey, and pQuery->eKey.
|
|
* 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
|
|
* merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
|
|
* is a previous result generated or not.
|
|
*/
|
|
void setIntervalQueryRange(STableQueryInfo *pTableQueryInfo, SQInfo *pQInfo, TSKEY key) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (pTableQueryInfo->queryRangeSet) {
|
|
pQuery->lastKey = key;
|
|
pTableQueryInfo->lastKey = key;
|
|
} else {
|
|
pQuery->window.skey = key;
|
|
STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
|
|
|
|
// for too small query range, no data in this interval.
|
|
if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
|
|
(!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* In handling the both ascending and descending order super table query, we need to find the first qualified
|
|
* timestamp of this table, and then set the first qualified start timestamp.
|
|
* In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
|
|
* operations involve.
|
|
*/
|
|
TSKEY skey1, ekey1;
|
|
STimeWindow w = {0};
|
|
SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
|
|
|
|
getAlignQueryTimeWindow(pQuery, win.skey, win.skey, win.ekey, &skey1, &ekey1, &w);
|
|
pWindowResInfo->startTime = pQuery->window.skey; // windowSKey may be 0 in case of 1970 timestamp
|
|
|
|
if (pWindowResInfo->prevSKey == 0) {
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
} else {
|
|
assert(win.ekey == pQuery->window.skey);
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
}
|
|
}
|
|
|
|
pTableQueryInfo->queryRangeSet = 1;
|
|
pTableQueryInfo->lastKey = pQuery->window.skey;
|
|
pTableQueryInfo->win.skey = pQuery->window.skey;
|
|
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
}
|
|
}
|
|
|
|
bool requireTimestamp(SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
|
|
int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId;
|
|
if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
|
|
/*
|
|
* 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
|
|
* 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
|
|
*/
|
|
STimeWindow *w = &pDataBlockInfo->window;
|
|
bool loadPrimaryTS = (pQuery->lastKey >= w->skey && pQuery->lastKey <= w->ekey) ||
|
|
(pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);
|
|
|
|
return loadPrimaryTS;
|
|
}
|
|
|
|
bool onDemandLoadDatablock(SQuery *pQuery, int16_t queryRangeSet) {
|
|
return (pQuery->intervalTime == 0) || ((queryRangeSet == 1) && (isIntervalQuery(pQuery)));
|
|
}
|
|
|
|
static int32_t getNumOfSubset(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
int32_t totalSubset = 0;
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
|
|
totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
|
|
} else {
|
|
totalSubset = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
}
|
|
|
|
return totalSubset;
|
|
}
|
|
|
|
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
int32_t numOfResult = 0;
|
|
int32_t startIdx = 0;
|
|
int32_t step = -1;
|
|
|
|
qTrace("QInfo:%p start to copy data from windowResInfo to query buf", GET_QINFO_ADDR(pQuery));
|
|
int32_t totalSubset = getNumOfSubset(pQInfo);
|
|
|
|
if (orderType == TSDB_ORDER_ASC) {
|
|
startIdx = pQInfo->groupIndex;
|
|
step = 1;
|
|
} else { // desc order copy all data
|
|
startIdx = totalSubset - pQInfo->groupIndex - 1;
|
|
step = -1;
|
|
}
|
|
|
|
for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
|
|
if (result[i].numOfRows == 0) {
|
|
pQInfo->offset = 0;
|
|
pQInfo->groupIndex += 1;
|
|
continue;
|
|
}
|
|
|
|
assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
|
|
|
|
int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
|
|
int32_t oldOffset = pQInfo->offset;
|
|
|
|
/*
|
|
* current output space is not enough to keep all the result data of this group, only copy partial results
|
|
* to SQuery object's result buffer
|
|
*/
|
|
if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
|
|
numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
|
|
pQInfo->offset += numOfRowsToCopy;
|
|
} else {
|
|
pQInfo->offset = 0;
|
|
pQInfo->groupIndex += 1;
|
|
}
|
|
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
|
|
|
|
char *out = pQuery->sdata[j]->data + numOfResult * size;
|
|
char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
|
|
memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
|
|
}
|
|
|
|
numOfResult += numOfRowsToCopy;
|
|
if (numOfResult == pQuery->rec.capacity) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
qTrace("QInfo:%p copy data to query buf completed", pQInfo);
|
|
|
|
#ifdef _DEBUG_VIEW
|
|
displayInterResult(pQuery->sdata, pQuery, numOfResult);
|
|
#endif
|
|
return numOfResult;
|
|
}
|
|
|
|
/**
|
|
* copyFromWindowResToSData support copy data in ascending/descending order
|
|
* For interval query of both super table and table, copy the data in ascending order, since the output results are
|
|
* ordered in SWindowResutl already. While handling the group by query for both table and super table,
|
|
* all group result are completed already.
|
|
*
|
|
* @param pQInfo
|
|
* @param result
|
|
*/
|
|
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
|
|
int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
|
|
|
|
pQuery->rec.rows += numOfResult;
|
|
|
|
assert(pQuery->rec.rows <= pQuery->rec.capacity);
|
|
}
|
|
|
|
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableDataInfo *pTableDataInfo) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// update the number of result for each, only update the number of rows for the corresponding window result.
|
|
if (pQuery->intervalTime == 0) {
|
|
int32_t g = pTableDataInfo->groupIdx;
|
|
assert(pRuntimeEnv->windowResInfo.size > 0);
|
|
|
|
SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
|
|
if (pWindowRes->numOfRows == 0) {
|
|
pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
|
|
}
|
|
}
|
|
}
|
|
|
|
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, STableDataInfo *pTableDataInfo,
|
|
SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis, SArray *pDataBlock,
|
|
__block_search_fn_t searchFn) {
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
STableQueryInfo *pTableQueryInfo = pTableDataInfo->pTableQInfo;
|
|
SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
|
|
pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
|
|
|
|
if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) {
|
|
rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
|
|
} else {
|
|
blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
|
|
}
|
|
|
|
updateWindowResNumOfRes(pRuntimeEnv, pTableDataInfo);
|
|
updatelastkey(pQuery, pTableQueryInfo);
|
|
}
|
|
|
|
bool vnodeHasRemainResults(void *handle) {
|
|
SQInfo *pQInfo = (SQInfo *)handle;
|
|
|
|
if (pQInfo == NULL || pQInfo->runtimeEnv.pQuery->interpoType == TSDB_INTERPO_NONE) {
|
|
return false;
|
|
}
|
|
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SInterpolationInfo *pInterpoInfo = &pRuntimeEnv->interpoInfo;
|
|
if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
|
|
return false;
|
|
}
|
|
|
|
int32_t remain = taosNumOfRemainPoints(pInterpoInfo);
|
|
if (remain > 0) {
|
|
return true;
|
|
} else {
|
|
if (pRuntimeEnv->pInterpoBuf == NULL) {
|
|
return false;
|
|
}
|
|
|
|
// query has completed
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
/*TSKEY ekey =*/taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->intervalTime,
|
|
pQuery->slidingTimeUnit, pQuery->precision);
|
|
// int32_t numOfTotal = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY
|
|
// *)pRuntimeEnv->pInterpoBuf[0]->data,
|
|
// remain, pQuery->intervalTime, ekey,
|
|
// pQuery->pointsToRead);
|
|
// return numOfTotal > 0;
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static UNUSED_FUNC int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **pDataSrc, int32_t numOfRows,
|
|
int32_t outputRows) {
|
|
#if 0
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery *pQuery = &pRuntimeEnv->pQuery;
|
|
|
|
assert(pRuntimeEnv->pCtx[0].outputBytes == TSDB_KEYSIZE);
|
|
|
|
// build support structure for performing interpolation
|
|
SSchema *pSchema = calloc(1, sizeof(SSchema) * pQuery->numOfOutput);
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
pSchema[i].bytes = pRuntimeEnv->pCtx[i].outputBytes;
|
|
pSchema[i].type = pQuery->pSelectExpr[i].type;
|
|
}
|
|
|
|
// SColumnModel *pModel = createColumnModel(pSchema, pQuery->numOfOutput, pQuery->pointsToRead);
|
|
|
|
char * srcData[TSDB_MAX_COLUMNS] = {0};
|
|
int32_t functions[TSDB_MAX_COLUMNS] = {0};
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
srcData[i] = pDataSrc[i]->data;
|
|
functions[i] = pQuery->pSelectExpr[i].pBase.functionId;
|
|
}
|
|
|
|
assert(0);
|
|
// int32_t numOfRes = taosDoInterpoResult(&pRuntimeEnv->interpoInfo, pQuery->interpoType, data, numOfRows, outputRows,
|
|
// pQuery->intervalTime, (int64_t *)pDataSrc[0]->data, pModel, srcData,
|
|
// pQuery->defaultVal, functions, pRuntimeEnv->pTabObj->pointsPerFileBlock);
|
|
|
|
destroyColumnModel(pModel);
|
|
free(pSchema);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
|
|
int32_t bytes = pQuery->pSelectExpr[col].bytes;
|
|
|
|
memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
|
|
data += bytes * numOfRows;
|
|
}
|
|
|
|
// all data returned, set query over
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
setQueryStatus(pQuery, QUERY_OVER);
|
|
}
|
|
}
|
|
|
|
int32_t vnodeQueryResultInterpolate(SQInfo *pQInfo, tFilePage **pDst, tFilePage **pDataSrc, int32_t numOfRows,
|
|
int32_t *numOfInterpo) {
|
|
// SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
// SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
#if 0
|
|
while (1) {
|
|
numOfRows = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo);
|
|
|
|
TSKEY ekey = taosGetRevisedEndKey(pQuery->window.skey, pQuery->order.order, pQuery->intervalTime,
|
|
pQuery->slidingTimeUnit, pQuery->precision);
|
|
int32_t numOfFinalRows = taosGetNumOfResultWithInterpo(&pRuntimeEnv->interpoInfo, (TSKEY *)pDataSrc[0]->data,
|
|
numOfRows, pQuery->intervalTime, ekey, pQuery->pointsToRead);
|
|
|
|
int32_t ret = resultInterpolate(pQInfo, pDst, pDataSrc, numOfRows, numOfFinalRows);
|
|
assert(ret == numOfFinalRows);
|
|
|
|
/* reached the start position of according to offset value, return immediately */
|
|
if (pQuery->limit.offset == 0) {
|
|
return ret;
|
|
}
|
|
|
|
if (pQuery->limit.offset < ret) {
|
|
ret -= pQuery->limit.offset;
|
|
// todo !!!!there exactly number of interpo is not valid.
|
|
// todo refactor move to the beginning of buffer
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
|
|
ret * pQuery->pSelectExpr[i].bytes);
|
|
}
|
|
pQuery->limit.offset = 0;
|
|
return ret;
|
|
} else {
|
|
pQuery->limit.offset -= ret;
|
|
ret = 0;
|
|
}
|
|
|
|
if (!vnodeHasRemainResults(pQInfo)) {
|
|
return ret;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
void vnodePrintQueryStatistics(SQInfo *pQInfo) {
|
|
#if 0
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SQueryCostSummary *pSummary = &pRuntimeEnv->summary;
|
|
if (pRuntimeEnv->pResultBuf == NULL) {
|
|
pSummary->tmpBufferInDisk = 0;
|
|
} else {
|
|
pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
|
|
}
|
|
|
|
qTrace("QInfo:%p statis: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
|
|
pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
|
|
|
|
qTrace("QInfo:%p statis: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
|
|
pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
|
|
pSummary->loadFieldUs / 1000.0);
|
|
|
|
qTrace(
|
|
"QInfo:%p statis: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
|
|
pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
|
|
pSummary->skippedFileBlocks, pSummary->totalGenData);
|
|
|
|
qTrace("QInfo:%p statis: cache blocks:%d", pQInfo, pSummary->blocksInCache, 0);
|
|
qTrace("QInfo:%p statis: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
|
|
|
|
qTrace("QInfo:%p statis: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
|
|
qTrace("QInfo:%p statis: seek ops:%d", pQInfo, pSummary->numOfSeek);
|
|
|
|
double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
|
|
double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
|
|
|
|
// todo add the intermediate result save cost!!
|
|
double computing = total - io;
|
|
|
|
qTrace(
|
|
"QInfo:%p statis: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
|
|
"comput:%.2fms(%.2f%)",
|
|
pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
|
|
pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
|
|
computing / 1000.0, computing * 100 / total);
|
|
#endif
|
|
}
|
|
|
|
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
if (pQuery->limit.offset == pBlockInfo->rows) { // current block will ignore completed
|
|
pQuery->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
|
|
pQuery->limit.offset = 0;
|
|
return;
|
|
}
|
|
|
|
if (QUERY_IS_ASC_QUERY(pQuery)) {
|
|
pQuery->pos = pQuery->limit.offset;
|
|
} else {
|
|
pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
|
|
}
|
|
|
|
assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
|
|
|
|
SArray * pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
|
|
SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
|
|
|
|
// update the pQuery->limit.offset value, and pQuery->pos value
|
|
TSKEY *keys = (TSKEY *)pColInfoData->pData;
|
|
|
|
// update the offset value
|
|
pQuery->lastKey = keys[pQuery->pos];
|
|
pQuery->limit.offset = 0;
|
|
|
|
int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey,
|
|
&pRuntimeEnv->windowResInfo, pDataBlock);
|
|
|
|
qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", rows:%d, res:%d", GET_QINFO_ADDR(pRuntimeEnv),
|
|
pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes);
|
|
}
|
|
|
|
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
|
|
return;
|
|
}
|
|
|
|
pQuery->pos = 0;
|
|
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
|
|
|
|
TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
|
|
|
|
while (tsdbNextDataBlock(pQueryHandle)) {
|
|
if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
|
|
return;
|
|
}
|
|
|
|
SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
|
|
|
|
if (pQuery->limit.offset > blockInfo.rows) {
|
|
pQuery->limit.offset -= blockInfo.rows;
|
|
pQuery->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
|
|
pQuery->lastKey += step;
|
|
|
|
qTrace("QInfo:%p skip rows:%d, offset:%" PRId64 "", GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
|
|
pQuery->limit.offset);
|
|
} else { // find the appropriated start position in current block
|
|
updateOffsetVal(pRuntimeEnv, &blockInfo);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// if queried with value filter, do NOT forward query start position
|
|
if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) {
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
|
|
* pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
|
|
* not valid. otherwise, we only forward pQuery->limit.offset number of points
|
|
*/
|
|
assert(pRuntimeEnv->windowResInfo.prevSKey == 0);
|
|
|
|
TSKEY skey1, ekey1;
|
|
STimeWindow w = {0};
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
|
|
while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
|
|
SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
|
|
|
|
if (QUERY_IS_ASC_QUERY(pQuery) && pWindowResInfo->prevSKey == 0) {
|
|
getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1, &ekey1,
|
|
&w);
|
|
pWindowResInfo->startTime = w.skey;
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
} else {
|
|
// the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
|
|
getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1, &ekey1,
|
|
&w);
|
|
|
|
pWindowResInfo->startTime = pQuery->window.skey;
|
|
pWindowResInfo->prevSKey = w.skey;
|
|
}
|
|
|
|
// the first time window
|
|
STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
|
|
|
|
while (pQuery->limit.offset > 0) {
|
|
if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
pQuery->limit.offset -= 1;
|
|
pWindowResInfo->prevSKey = win.skey;
|
|
}
|
|
|
|
STimeWindow tw = win;
|
|
getNextTimeWindow(pQuery, &tw);
|
|
|
|
if (pQuery->limit.offset == 0) {
|
|
if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
// load the data block
|
|
SArray * pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
|
|
SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
|
|
|
|
tw = win;
|
|
int32_t startPos =
|
|
getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
|
|
assert(startPos >= 0);
|
|
|
|
// set the abort info
|
|
pQuery->pos = startPos;
|
|
pQuery->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
|
|
pWindowResInfo->prevSKey = tw.skey;
|
|
|
|
int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey,
|
|
&pRuntimeEnv->windowResInfo, pDataBlock);
|
|
|
|
qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", rows:%d, res:%d",
|
|
GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes);
|
|
return true;
|
|
} else {
|
|
// do nothing,
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// next time window starts from current data block
|
|
if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
// load the data block, note that only the primary timestamp column is required
|
|
SArray * pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
|
|
SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
|
|
|
|
tw = win;
|
|
int32_t startPos =
|
|
getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
|
|
assert(startPos >= 0);
|
|
|
|
// set the abort info
|
|
pQuery->pos = startPos;
|
|
pQuery->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
|
|
pWindowResInfo->prevSKey = tw.skey;
|
|
win = tw;
|
|
} else {
|
|
break; // offset is not 0, and next time window locates in the next block.
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, bool isSTableQuery) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
int32_t code = TSDB_CODE_SUCCESS;
|
|
|
|
setScanLimitationByResultBuffer(pQuery);
|
|
changeExecuteScanOrder(pQuery, false);
|
|
|
|
// dataInCache requires lastKey value
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
|
|
STsdbQueryCond cond = {
|
|
.twindow = pQuery->window,
|
|
.order = pQuery->order.order,
|
|
.colList = pQuery->colList,
|
|
.numOfCols = pQuery->numOfCols,
|
|
};
|
|
|
|
if (!isSTableQuery || isIntervalQuery(pQuery) || isFixedOutputQuery(pQuery)) {
|
|
pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->groupInfo);
|
|
}
|
|
|
|
pQInfo->tsdb = tsdb;
|
|
|
|
pRuntimeEnv->pQuery = pQuery;
|
|
pRuntimeEnv->pTSBuf = param;
|
|
pRuntimeEnv->cur.vnodeIndex = -1;
|
|
pRuntimeEnv->stableQuery = isSTableQuery;
|
|
|
|
if (param != NULL) {
|
|
int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
|
|
tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
|
|
}
|
|
|
|
// create runtime environment
|
|
code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
|
|
if (code != TSDB_CODE_SUCCESS) {
|
|
return code;
|
|
}
|
|
|
|
pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, isSTableQuery);
|
|
|
|
if (isSTableQuery) {
|
|
int32_t rows = getInitialPageNum(pQInfo);
|
|
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize);
|
|
if (code != TSDB_CODE_SUCCESS) {
|
|
return code;
|
|
}
|
|
|
|
if (pQuery->intervalTime == 0) {
|
|
int16_t type = TSDB_DATA_TYPE_NULL;
|
|
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group by columns not tags;
|
|
type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
|
|
} else {
|
|
type = TSDB_DATA_TYPE_INT; // group id
|
|
}
|
|
|
|
initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
|
|
}
|
|
|
|
} else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
|
|
int32_t rows = getInitialPageNum(pQInfo);
|
|
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize);
|
|
if (code != TSDB_CODE_SUCCESS) {
|
|
return code;
|
|
}
|
|
|
|
int16_t type = TSDB_DATA_TYPE_NULL;
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
|
|
} else {
|
|
type = TSDB_DATA_TYPE_TIMESTAMP;
|
|
}
|
|
|
|
initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
|
|
}
|
|
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
|
|
SPointInterpoSupporter interpInfo = {0};
|
|
pointInterpSupporterInit(pQuery, &interpInfo);
|
|
|
|
/*
|
|
* in case of last_row query without query range, we set the query timestamp to
|
|
* pMeterObj->lastKey. Otherwise, keep the initial query time range unchanged.
|
|
*/
|
|
if (isFirstLastRowQuery(pQuery) && notHasQueryTimeRange(pQuery)) {
|
|
if (!normalizeUnBoundLastRowQuery(pQInfo, &interpInfo)) {
|
|
sem_post(&pQInfo->dataReady);
|
|
pointInterpSupporterDestroy(&interpInfo);
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* here we set the value for before and after the specified time into the
|
|
* parameter for interpolation query
|
|
*/
|
|
pointInterpSupporterSetData(pQInfo, &interpInfo);
|
|
pointInterpSupporterDestroy(&interpInfo);
|
|
|
|
int64_t rs = taosGetIntervalStartTimestamp(pQuery->window.skey, pQuery->intervalTime, pQuery->slidingTimeUnit,
|
|
pQuery->precision);
|
|
taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, rs, 0, 0);
|
|
// allocMemForInterpo(pQInfo, pQuery, pMeterObj);
|
|
|
|
if (!isPointInterpoQuery(pQuery)) {
|
|
// assert(pQuery->pos >= 0 && pQuery->slot >= 0);
|
|
}
|
|
|
|
// the pQuery->window.skey is changed during normalizedFirstQueryRange, so set the newest lastkey value
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static UNUSED_FUNC bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, STableGroupInfo *pSidset) {
|
|
if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
|
|
return false;
|
|
}
|
|
|
|
for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
|
|
SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
|
|
if (pColIndex->flag == TSDB_COL_TAG) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static UNUSED_FUNC bool doCheckWithPrevQueryRange(SQuery *pQuery, TSKEY nextKey) {
|
|
if ((nextKey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
|
|
(nextKey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
|
|
if (pResInfo != NULL) {
|
|
pResInfo->complete = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int64_t queryOnDataBlocks(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
int64_t st = taosGetTimestampMs();
|
|
|
|
TsdbQueryHandleT *pQueryHandle = pRuntimeEnv->pQueryHandle;
|
|
while (tsdbNextDataBlock(pQueryHandle)) {
|
|
if (isQueryKilled(pQInfo)) {
|
|
break;
|
|
}
|
|
|
|
SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
|
|
STableDataInfo *pTableDataInfo = NULL;
|
|
STable * pTable = NULL;
|
|
|
|
// todo opt performance using hash table
|
|
size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
for (int32_t i = 0; i < numOfGroup; ++i) {
|
|
SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
|
|
|
|
size_t num = taosArrayGetSize(group);
|
|
for (int32_t j = 0; j < num; ++j) {
|
|
SPair * p = taosArrayGet(group, j);
|
|
STableDataInfo *pInfo = p->sec;
|
|
|
|
if (pInfo->pTableQInfo->tid == blockInfo.sid) {
|
|
pTableDataInfo = p->sec;
|
|
pTable = p->first;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(pTableDataInfo != NULL && pTableDataInfo->pTableQInfo != NULL);
|
|
STableQueryInfo *pTableQueryInfo = pTableDataInfo->pTableQInfo;
|
|
|
|
restoreIntervalQueryRange(pRuntimeEnv, pTableQueryInfo);
|
|
|
|
SDataStatis *pStatis = NULL;
|
|
SArray * pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, &blockInfo, &pStatis);
|
|
|
|
TSKEY nextKey = blockInfo.window.skey;
|
|
if (!isIntervalQuery(pQuery)) {
|
|
setExecutionContext(pQInfo, pTableQueryInfo, pTable, pTableDataInfo->groupIdx, nextKey);
|
|
} else { // interval query
|
|
setIntervalQueryRange(pTableQueryInfo, pQInfo, nextKey);
|
|
int32_t ret = setAdditionalInfo(pQInfo, pTable, pTableQueryInfo);
|
|
|
|
if (ret != TSDB_CODE_SUCCESS) {
|
|
pQInfo->code = ret;
|
|
return taosGetTimestampMs() - st;
|
|
}
|
|
}
|
|
|
|
stableApplyFunctionsOnBlock(pRuntimeEnv, pTableDataInfo, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
|
|
}
|
|
|
|
int64_t et = taosGetTimestampMs();
|
|
return et - st;
|
|
}
|
|
|
|
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
|
|
SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
|
|
SPair * p = taosArrayGet(group, index);
|
|
|
|
STable * pTable = p->first;
|
|
STableDataInfo *pInfo = p->sec;
|
|
|
|
setTagVal(pRuntimeEnv, pTable->tableId, pQInfo->tsdb);
|
|
|
|
qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
|
|
pTable->tableId.uid, pInfo->pTableQInfo->lastKey, pInfo->pTableQInfo->win.ekey);
|
|
|
|
STsdbQueryCond cond = {
|
|
.twindow = {pInfo->pTableQInfo->lastKey, pInfo->pTableQInfo->win.ekey},
|
|
.order = pQuery->order.order,
|
|
.colList = pQuery->colList,
|
|
.numOfCols = pQuery->numOfCols,
|
|
};
|
|
|
|
SArray *g1 = taosArrayInit(1, POINTER_BYTES);
|
|
SArray *tx = taosArrayInit(1, sizeof(SPair));
|
|
|
|
taosArrayPush(tx, p);
|
|
taosArrayPush(g1, &tx);
|
|
STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
|
|
|
|
// include only current table
|
|
if (pRuntimeEnv->pQueryHandle != NULL) {
|
|
tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
|
|
pRuntimeEnv->pQueryHandle = NULL;
|
|
}
|
|
|
|
pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp);
|
|
taosArrayDestroy(tx);
|
|
taosArrayDestroy(g1);
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
if (pRuntimeEnv->cur.vnodeIndex == -1) {
|
|
int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
|
|
STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
|
|
|
|
// failed to find data with the specified tag value
|
|
if (elem.vnode < 0) {
|
|
return false;
|
|
}
|
|
} else {
|
|
tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
|
|
}
|
|
}
|
|
|
|
initCtxOutputBuf(pRuntimeEnv);
|
|
return true;
|
|
}
|
|
|
|
static UNUSED_FUNC int64_t doCheckMetersInGroup(SQInfo *pQInfo, int32_t index, int32_t start) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (!multiTableMultioutputHelper(pQInfo, index)) {
|
|
return 0;
|
|
}
|
|
|
|
SPointInterpoSupporter pointInterpSupporter = {0};
|
|
pointInterpSupporterInit(pQuery, &pointInterpSupporter);
|
|
assert(0);
|
|
|
|
// if (!normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL)) {
|
|
// pointInterpSupporterDestroy(&pointInterpSupporter);
|
|
// return 0;
|
|
// }
|
|
|
|
/*
|
|
* here we set the value for before and after the specified time into the
|
|
* parameter for interpolation query
|
|
*/
|
|
pointInterpSupporterSetData(pQInfo, &pointInterpSupporter);
|
|
pointInterpSupporterDestroy(&pointInterpSupporter);
|
|
|
|
scanAllDataBlocks(pRuntimeEnv);
|
|
|
|
// first/last_row query, do not invoke the finalize for super table query
|
|
finalizeQueryResult(pRuntimeEnv);
|
|
|
|
int64_t numOfRes = getNumOfResult(pRuntimeEnv);
|
|
assert(numOfRes == 1 || numOfRes == 0);
|
|
|
|
// accumulate the point interpolation result
|
|
if (numOfRes > 0) {
|
|
pQuery->rec.rows += numOfRes;
|
|
forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
|
|
}
|
|
|
|
return numOfRes;
|
|
}
|
|
|
|
/**
|
|
* super table query handler
|
|
* 1. super table projection query, group-by on normal columns query, ts-comp query
|
|
* 2. point interpolation query, last row query
|
|
*
|
|
* @param pQInfo
|
|
*/
|
|
static void sequentialTableProcess(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
|
|
size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
|
|
if (isPointInterpoQuery(pQuery)) {
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
|
|
|
|
#if 0
|
|
while (pQInfo->groupIndex < numOfGroups) {
|
|
|
|
SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
|
|
size_t numOfTable = taosArrayGetSize(group);
|
|
|
|
if (isFirstLastRowQuery(pQuery)) {
|
|
qTrace("QInfo:%p last_row query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pTableIdList->numOfSubSet,
|
|
pQInfo->groupIndex);
|
|
|
|
TSKEY key = -1;
|
|
int32_t index = -1;
|
|
|
|
// choose the last key for one group
|
|
pQInfo->tableIndex = 0;
|
|
|
|
for (int32_t k = 0; k < numOfTable; ++k, pQInfo->tableIndex++) {
|
|
if (isQueryKilled(pQInfo)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
pQuery->window.skey = key;
|
|
pQuery->window.ekey = key;
|
|
|
|
// int64_t num = doCheckMetersInGroup(pQInfo, index, start);
|
|
// assert(num >= 0);
|
|
} else {
|
|
qTrace("QInfo:%p interp query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pTableIdList->numOfSubSet,
|
|
pQInfo->groupIndex);
|
|
|
|
for (int32_t k = start; k <= end; ++k) {
|
|
if (isQueryKilled(pQInfo)) {
|
|
setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK);
|
|
return;
|
|
}
|
|
|
|
pQuery->skey = pSupporter->rawSKey;
|
|
pQuery->ekey = pSupporter->rawEKey;
|
|
|
|
int64_t num = doCheckMetersInGroup(pQInfo, k, start);
|
|
if (num == 1) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
pSupporter->groupIndex++;
|
|
|
|
// output buffer is full, return to client
|
|
if (pQuery->size >= pQuery->pointsToRead) {
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
} else {
|
|
createTableDataInfo(pQInfo);
|
|
|
|
/*
|
|
* 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query
|
|
* if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
|
|
* we need to return it to client in the first place.
|
|
*/
|
|
if (pQInfo->groupIndex > 0) {
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
pQuery->rec.total += pQuery->rec.rows;
|
|
|
|
if (pQuery->rec.rows > 0) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
// all data have returned already
|
|
if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
|
|
return;
|
|
}
|
|
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
|
|
|
|
SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
|
|
assert(taosArrayGetSize(group) == pQInfo->groupInfo.numOfTables &&
|
|
1 == taosArrayGetSize(pQInfo->groupInfo.pGroupList));
|
|
|
|
while (pQInfo->tableIndex < pQInfo->groupInfo.numOfTables) {
|
|
if (isQueryKilled(pQInfo)) {
|
|
return;
|
|
}
|
|
|
|
SPair * p = taosArrayGet(group, pQInfo->tableIndex);
|
|
STableDataInfo *pInfo = p->sec;
|
|
|
|
TSKEY skey = pInfo->pTableQInfo->lastKey;
|
|
if (skey > 0) {
|
|
pQuery->window.skey = skey;
|
|
}
|
|
|
|
if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
|
|
pQInfo->tableIndex++;
|
|
continue;
|
|
}
|
|
|
|
// SPointInterpoSupporter pointInterpSupporter = {0};
|
|
|
|
// TODO handle the limit problem
|
|
if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
|
|
// skipBlocks(pRuntimeEnv);
|
|
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
pQInfo->tableIndex++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
scanAllDataBlocks(pRuntimeEnv);
|
|
|
|
pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
|
|
skipResults(pRuntimeEnv);
|
|
|
|
// the limitation of output result is reached, set the query completed
|
|
if (limitResults(pQInfo)) {
|
|
pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
|
|
break;
|
|
}
|
|
|
|
// enable execution for next table, when handling the projection query
|
|
enableExecutionForNextTable(pRuntimeEnv);
|
|
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
/*
|
|
* query range is identical in terms of all meters involved in query,
|
|
* so we need to restore them at the *beginning* of query on each meter,
|
|
* not the consecutive query on meter on which is aborted due to buffer limitation
|
|
* to ensure that, we can reset the query range once query on a meter is completed.
|
|
*/
|
|
pQInfo->tableIndex++;
|
|
pInfo->pTableQInfo->lastKey = pQuery->lastKey;
|
|
|
|
// if the buffer is full or group by each table, we need to jump out of the loop
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
|
|
isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
|
|
break;
|
|
}
|
|
|
|
} else { // forward query range
|
|
pQuery->window.skey = pQuery->lastKey;
|
|
|
|
// all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
|
|
if (pQuery->rec.rows == 0) {
|
|
assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
|
|
continue;
|
|
} else {
|
|
// pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey;
|
|
// // buffer is full, wait for the next round to retrieve data from current meter
|
|
// assert(Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL));
|
|
// break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* 1. super table projection query, group-by on normal columns query, ts-comp query
|
|
* 2. point interpolation query, last row query
|
|
*
|
|
* group-by on normal columns query and last_row query do NOT invoke the finalizer here,
|
|
* since the finalize stage will be done at the client side.
|
|
*
|
|
* projection query, point interpolation query do not need the finalizer.
|
|
*
|
|
* Only the ts-comp query requires the finalizer function to be executed here.
|
|
*/
|
|
if (isTSCompQuery(pQuery)) {
|
|
finalizeQueryResult(pRuntimeEnv);
|
|
}
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
|
|
}
|
|
|
|
// todo refactor
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
|
|
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
|
|
|
|
for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
|
|
SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
|
|
pStatus->closed = true; // enable return all results for group by normal columns
|
|
|
|
SWindowResult *pResult = &pWindowResInfo->pResult[i];
|
|
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
|
|
pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
|
|
}
|
|
}
|
|
|
|
pQInfo->groupIndex = 0;
|
|
pQuery->rec.rows = 0;
|
|
copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);
|
|
}
|
|
|
|
pQuery->rec.total += pQuery->rec.rows;
|
|
|
|
qTrace(
|
|
"QInfo %p, numOfTables:%d, index:%d, numOfGroups:%d, %d points returned, total:%d totalReturn:%d,"
|
|
" offset:%" PRId64,
|
|
pQInfo, pQInfo->groupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
|
|
pQuery->limit.offset);
|
|
}
|
|
|
|
static void createTableDataInfo(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
// todo make sure the table are added the reference count to gauranteed that all involved tables are valid
|
|
size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
|
|
int32_t index = 0;
|
|
for (int32_t i = 0; i < numOfGroups; ++i) { // load all meter meta info
|
|
SArray *group = *(SArray **)taosArrayGet(pQInfo->groupInfo.pGroupList, i);
|
|
|
|
size_t s = taosArrayGetSize(group);
|
|
for (int32_t j = 0; j < s; ++j) {
|
|
SPair *p = (SPair *)taosArrayGet(group, j);
|
|
|
|
// STableDataInfo has been created for each table
|
|
if (p->sec != NULL) { // todo refactor
|
|
return;
|
|
}
|
|
|
|
STableDataInfo *pInfo = calloc(1, sizeof(STableDataInfo));
|
|
|
|
setTableDataInfo(pInfo, index, i);
|
|
pInfo->pTableQInfo =
|
|
createTableQueryInfo(&pQInfo->runtimeEnv, ((STable *)(p->first))->tableId.tid, pQuery->window);
|
|
|
|
p->sec = pInfo;
|
|
|
|
index += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void prepareQueryInfoForReverseScan(SQInfo *pQInfo) {
|
|
// SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
// for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
|
|
// STableQueryInfo *pTableQueryInfo = pQInfo->pTableDataInfo[i].pTableQInfo;
|
|
// changeMeterQueryInfoForSuppleQuery(pQuery, pTableQueryInfo);
|
|
// }
|
|
}
|
|
|
|
static void doSaveContext(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv);
|
|
disableFuncForReverseScan(pQInfo, pQuery->order.order);
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1u;
|
|
}
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
prepareQueryInfoForReverseScan(pQInfo);
|
|
}
|
|
|
|
static void doRestoreContext(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
|
|
|
|
if (pRuntimeEnv->pTSBuf != NULL) {
|
|
pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1;
|
|
}
|
|
|
|
switchCtxOrder(pRuntimeEnv);
|
|
SET_MASTER_SCAN_FLAG(pRuntimeEnv);
|
|
}
|
|
|
|
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
if (isIntervalQuery(pQuery)) {
|
|
// for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
|
|
// STableQueryInfo *pTableQueryInfo = pQInfo->pTableDataInfo[i].pTableQInfo;
|
|
// closeAllTimeWindow(&pTableQueryInfo->windowResInfo);
|
|
// }
|
|
size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
for (int32_t i = 0; i < numOfGroup; ++i) {
|
|
SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
|
|
|
|
size_t num = taosArrayGetSize(group);
|
|
for (int32_t j = 0; j < num; ++j) {
|
|
SPair * p = taosArrayGet(group, j);
|
|
STableDataInfo *pInfo = p->sec;
|
|
|
|
closeAllTimeWindow(&pInfo->pTableQInfo->windowResInfo);
|
|
}
|
|
}
|
|
} else { // close results for group result
|
|
closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
|
|
}
|
|
}
|
|
|
|
static void multiTableQueryProcess(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (pQInfo->groupIndex > 0) {
|
|
/*
|
|
* if the groupIndex > 0, the query process must be completed yet, we only need to
|
|
* copy the data into output buffer
|
|
*/
|
|
if (isIntervalQuery(pQuery)) {
|
|
copyResToQueryResultBuf(pQInfo, pQuery);
|
|
|
|
#ifdef _DEBUG_VIEW
|
|
displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->num);
|
|
#endif
|
|
} else {
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
}
|
|
|
|
pQuery->rec.rows += pQuery->rec.rows;
|
|
|
|
if (pQuery->rec.rows == 0) {
|
|
// vnodePrintQueryStatistics(pSupporter);
|
|
}
|
|
|
|
qTrace("QInfo:%p current:%lld, total:%lld", pQInfo, pQuery->rec.rows, pQuery->rec.total);
|
|
return;
|
|
}
|
|
|
|
qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
|
|
pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);
|
|
|
|
// create the query support structures
|
|
createTableDataInfo(pQInfo);
|
|
|
|
// do check all qualified data blocks
|
|
int64_t el = queryOnDataBlocks(pQInfo);
|
|
qTrace("QInfo:%p forward scan completed, elapsed time: %lldms, reversed scan start, order:%d", pQInfo, el,
|
|
pQuery->order.order ^ 1u);
|
|
|
|
// query error occurred or query is killed, abort current execution
|
|
if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
|
|
qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
|
|
return;
|
|
}
|
|
|
|
// close all time window results
|
|
doCloseAllTimeWindowAfterScan(pQInfo);
|
|
|
|
if (needReverseScan(pQuery)) {
|
|
doSaveContext(pQInfo);
|
|
|
|
el = queryOnDataBlocks(pQInfo);
|
|
qTrace("QInfo:%p reversed scan completed, elapsed time: %lldms", pQInfo, el);
|
|
|
|
doRestoreContext(pQInfo);
|
|
} else {
|
|
qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
|
|
}
|
|
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
|
|
if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
|
|
qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
|
|
return;
|
|
}
|
|
|
|
if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
|
|
// assert(pSupporter->groupIndex == 0 && pSupporter->numOfGroupResultPages == 0);
|
|
|
|
if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
|
|
copyResToQueryResultBuf(pQInfo, pQuery);
|
|
|
|
#ifdef _DEBUG_VIEW
|
|
displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->num);
|
|
#endif
|
|
}
|
|
} else { // not a interval query
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
}
|
|
|
|
// handle the limitation of output buffer
|
|
qTrace("QInfo:%p points returned:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
|
|
}
|
|
|
|
/*
|
|
* in each query, this function will be called only once, no retry for further result.
|
|
*
|
|
* select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
|
|
* select count(*) from table_name group by status_column;
|
|
*/
|
|
static void tableFixedOutputProcess(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
scanAllDataBlocks(pRuntimeEnv);
|
|
finalizeQueryResult(pRuntimeEnv);
|
|
|
|
if (isQueryKilled(pQInfo)) {
|
|
return;
|
|
}
|
|
|
|
// since the numOfOutputElems must be identical for all sql functions that are allowed to be executed simutanelously.
|
|
pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
|
|
|
|
// must be top/bottom query if offset > 0
|
|
if (pQuery->limit.offset > 0) {
|
|
assert(isTopBottomQuery(pQuery));
|
|
}
|
|
|
|
skipResults(pRuntimeEnv);
|
|
limitResults(pQInfo);
|
|
}
|
|
|
|
static void tableMultiOutputProcess(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
// for ts_comp query, re-initialized is not allowed
|
|
if (!isTSCompQuery(pQuery)) {
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
}
|
|
|
|
// skip blocks without load the actual data block from file if no filter condition present
|
|
skipBlocks(&pQInfo->runtimeEnv);
|
|
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
return;
|
|
}
|
|
|
|
while (1) {
|
|
scanAllDataBlocks(pRuntimeEnv);
|
|
finalizeQueryResult(pRuntimeEnv);
|
|
|
|
if (isQueryKilled(pQInfo)) {
|
|
return;
|
|
}
|
|
|
|
pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
|
|
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
|
|
skipResults(pRuntimeEnv);
|
|
}
|
|
|
|
/*
|
|
* 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
|
|
* 2. if pQuery->size > 0, pQuery->limit.offset must be 0
|
|
*/
|
|
if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
break;
|
|
}
|
|
|
|
qTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
|
|
pQInfo, pQuery->limit.offset, pQuery->lastKey);
|
|
|
|
resetCtxOutputBuf(pRuntimeEnv);
|
|
}
|
|
|
|
limitResults(pQInfo);
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
|
|
qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo, pQuery->lastKey,
|
|
pQuery->window.ekey);
|
|
}
|
|
|
|
if (!isTSCompQuery(pQuery)) {
|
|
assert(pQuery->rec.rows <= pQuery->rec.capacity);
|
|
}
|
|
}
|
|
|
|
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv) {
|
|
SQuery *pQuery = pRuntimeEnv->pQuery;
|
|
|
|
while (1) {
|
|
scanAllDataBlocks(pRuntimeEnv);
|
|
|
|
if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
|
|
return;
|
|
}
|
|
|
|
assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
|
|
finalizeQueryResult(pRuntimeEnv);
|
|
|
|
// here we can ignore the records in case of no interpolation
|
|
// todo handle offset, in case of top/bottom interval query
|
|
if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
|
|
pQuery->interpoType == TSDB_INTERPO_NONE) {
|
|
// maxOutput <= 0, means current query does not generate any results
|
|
int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
|
|
|
|
int32_t c = MIN(numOfClosed, pQuery->limit.offset);
|
|
clearFirstNTimeWindow(pRuntimeEnv, c);
|
|
pQuery->limit.offset -= c;
|
|
}
|
|
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// handle time interval query on table
|
|
static void tableIntervalProcess(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
int32_t numOfInterpo = 0;
|
|
|
|
// skip blocks without load the actual data block from file if no filter condition present
|
|
skipTimeInterval(pRuntimeEnv);
|
|
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
return;
|
|
}
|
|
|
|
while (1) {
|
|
tableIntervalProcessImpl(pRuntimeEnv);
|
|
|
|
if (isIntervalQuery(pQuery)) {
|
|
pQInfo->groupIndex = 0; // always start from 0
|
|
pQuery->rec.rows = 0;
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
|
|
clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
|
|
}
|
|
|
|
// the offset is handled at prepare stage if no interpolation involved
|
|
if (pQuery->interpoType == TSDB_INTERPO_NONE) {
|
|
limitResults(pQInfo);
|
|
break;
|
|
} else {
|
|
taosInterpoSetStartInfo(&pRuntimeEnv->interpoInfo, pQuery->rec.rows, pQuery->interpoType);
|
|
SData **pInterpoBuf = pRuntimeEnv->pInterpoBuf;
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
memcpy(pInterpoBuf[i]->data, pQuery->sdata[i]->data, pQuery->rec.rows * pQuery->pSelectExpr[i].bytes);
|
|
}
|
|
|
|
numOfInterpo = 0;
|
|
pQuery->rec.rows = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata, (tFilePage **)pInterpoBuf,
|
|
pQuery->rec.rows, &numOfInterpo);
|
|
|
|
qTrace("QInfo: %p interpo completed, final:%d", pQInfo, pQuery->rec.rows);
|
|
if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
limitResults(pQInfo);
|
|
break;
|
|
}
|
|
|
|
// no result generated yet, continue retrieve data
|
|
pQuery->rec.rows = 0;
|
|
}
|
|
}
|
|
|
|
// all data scanned, the group by normal column can return
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // todo refactor with merge interval time result
|
|
pQInfo->groupIndex = 0;
|
|
pQuery->rec.rows = 0;
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
|
|
}
|
|
|
|
pQInfo->pointsInterpo += numOfInterpo;
|
|
}
|
|
|
|
static void tableQueryImpl(SQInfo *pQInfo) {
|
|
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
|
|
SQuery * pQuery = pRuntimeEnv->pQuery;
|
|
|
|
if (vnodeHasRemainResults(pQInfo)) {
|
|
/*
|
|
* There are remain results that are not returned due to result interpolation
|
|
* So, we do keep in this procedure instead of launching retrieve procedure for next results.
|
|
*/
|
|
int32_t numOfInterpo = 0;
|
|
int32_t remain = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo);
|
|
pQuery->rec.rows = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata,
|
|
(tFilePage **)pRuntimeEnv->pInterpoBuf, remain, &numOfInterpo);
|
|
|
|
limitResults(pQInfo);
|
|
|
|
pQInfo->pointsInterpo += numOfInterpo;
|
|
qTrace("QInfo:%p current:%d returned, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
|
|
sem_post(&pQInfo->dataReady);
|
|
return;
|
|
}
|
|
|
|
// here we have scan all qualified data in both data file and cache
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
// continue to get push data from the group result
|
|
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
|
|
((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
|
|
// todo limit the output for interval query?
|
|
pQuery->rec.rows = 0;
|
|
pQInfo->groupIndex = 0; // always start from 0
|
|
|
|
if (pRuntimeEnv->windowResInfo.size > 0) {
|
|
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
|
|
pQuery->rec.rows += pQuery->rec.rows;
|
|
|
|
clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
|
|
|
|
if (pQuery->rec.rows > 0) {
|
|
qTrace("QInfo:%p %d rows returned from group results, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
|
|
sem_post(&pQInfo->dataReady);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
qTrace("QInfo:%p query over, %d rows are returned", pQInfo, pQuery->rec.total);
|
|
// vnodePrintQueryStatistics(pSupporter);
|
|
sem_post(&pQInfo->dataReady);
|
|
return;
|
|
}
|
|
|
|
// number of points returned during this query
|
|
pQuery->rec.rows = 0;
|
|
int64_t st = taosGetTimestampUs();
|
|
|
|
// group by normal column, sliding window query, interval query are handled by interval query processor
|
|
if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // interval (down sampling operation)
|
|
tableIntervalProcess(pQInfo);
|
|
} else if (isFixedOutputQuery(pQuery)) {
|
|
tableFixedOutputProcess(pQInfo);
|
|
} else { // diff/add/multiply/subtract/division
|
|
assert(pQuery->checkBuffer == 1);
|
|
tableMultiOutputProcess(pQInfo);
|
|
}
|
|
|
|
// record the total elapsed time
|
|
pQInfo->elapsedTime += (taosGetTimestampUs() - st);
|
|
assert(pQInfo->groupInfo.numOfTables == 1);
|
|
|
|
/* check if query is killed or not */
|
|
if (isQueryKilled(pQInfo)) {
|
|
qTrace("QInfo:%p query is killed", pQInfo);
|
|
} else {
|
|
// STableId* pTableId = taosArrayGet(pQInfo->groupInfo, 0);
|
|
// qTrace("QInfo:%p uid:%" PRIu64 " tid:%d, query completed, %" PRId64 " rows returned, numOfTotal:%" PRId64 "
|
|
// rows",
|
|
// pQInfo, pTableId->uid, pTableId->tid, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
|
|
}
|
|
|
|
sem_post(&pQInfo->dataReady);
|
|
}
|
|
|
|
static void stableQueryImpl(SQInfo *pQInfo) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
pQuery->rec.rows = 0;
|
|
|
|
int64_t st = taosGetTimestampUs();
|
|
|
|
if (isIntervalQuery(pQuery) ||
|
|
(isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr))) {
|
|
multiTableQueryProcess(pQInfo);
|
|
} else {
|
|
assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
|
|
isGroupbyNormalCol(pQuery->pGroupbyExpr));
|
|
|
|
sequentialTableProcess(pQInfo);
|
|
}
|
|
|
|
// record the total elapsed time
|
|
pQInfo->elapsedTime += (taosGetTimestampUs() - st);
|
|
// taosInterpoSetStartInfo(&pQInfo->runtimeEnv.interpoInfo, pQuery->size, pQInfo->query.interpoType);
|
|
|
|
if (pQuery->rec.rows == 0) {
|
|
qTrace("QInfo:%p over, %d tables queried, %d points are returned", pQInfo, pQInfo->groupInfo.numOfTables,
|
|
pQuery->rec.total);
|
|
// vnodePrintQueryStatistics(pSupporter);
|
|
}
|
|
|
|
sem_post(&pQInfo->dataReady);
|
|
}
|
|
|
|
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
|
|
int32_t j = 0;
|
|
|
|
if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
|
|
while(j < pQueryMsg->numOfTags) {
|
|
if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
|
|
return j;
|
|
}
|
|
|
|
j += 1;
|
|
}
|
|
|
|
} else {
|
|
while (j < pQueryMsg->numOfCols) {
|
|
if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
|
|
return j;
|
|
}
|
|
|
|
j += 1;
|
|
}
|
|
}
|
|
|
|
assert(0);
|
|
}
|
|
|
|
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
|
|
int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
|
|
return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
|
|
}
|
|
|
|
static int32_t validateQueryMsg(SQueryTableMsg *pQueryMsg) {
|
|
if (pQueryMsg->intervalTime < 0) {
|
|
qError("qmsg:%p illegal value of aggTimeInterval %" PRId64 "", pQueryMsg, pQueryMsg->intervalTime);
|
|
return -1;
|
|
}
|
|
|
|
if (pQueryMsg->numOfCols <= 0 || pQueryMsg->numOfCols > TSDB_MAX_COLUMNS) {
|
|
qError("qmsg:%p illegal value of numOfCols %d", pQueryMsg, pQueryMsg->numOfCols);
|
|
return -1;
|
|
}
|
|
|
|
if (pQueryMsg->numOfTables <= 0) {
|
|
qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
|
|
return -1;
|
|
}
|
|
|
|
if (pQueryMsg->numOfGroupCols < 0) {
|
|
qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
|
|
return -1;
|
|
}
|
|
|
|
if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
|
|
qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
|
|
assert(pQueryMsg->numOfTables > 0);
|
|
|
|
*pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableId));
|
|
|
|
STableIdInfo *pTableIdInfo = (STableIdInfo *)pMsg;
|
|
pTableIdInfo->sid = htonl(pTableIdInfo->sid);
|
|
pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
|
|
pTableIdInfo->key = htobe64(pTableIdInfo->key);
|
|
|
|
STableId id = {.uid = pTableIdInfo->uid, .tid = pTableIdInfo->sid};
|
|
taosArrayPush(*pTableIdList, &id);
|
|
|
|
pMsg += sizeof(STableIdInfo);
|
|
|
|
for (int32_t j = 1; j < pQueryMsg->numOfTables; ++j) {
|
|
pTableIdInfo = (STableIdInfo *)pMsg;
|
|
|
|
pTableIdInfo->sid = htonl(pTableIdInfo->sid);
|
|
pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
|
|
pTableIdInfo->key = htobe64(pTableIdInfo->key);
|
|
|
|
taosArrayPush(*pTableIdList, pTableIdInfo);
|
|
pMsg += sizeof(STableIdInfo);
|
|
}
|
|
|
|
return pMsg;
|
|
}
|
|
|
|
/**
|
|
* pQueryMsg->head has been converted before this function is called.
|
|
*
|
|
* @param pQueryMsg
|
|
* @param pTableIdList
|
|
* @param pExpr
|
|
* @return
|
|
*/
|
|
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
|
|
char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
|
|
pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);
|
|
|
|
pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
|
|
pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
|
|
pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
|
|
pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
|
|
pQueryMsg->limit = htobe64(pQueryMsg->limit);
|
|
pQueryMsg->offset = htobe64(pQueryMsg->offset);
|
|
|
|
pQueryMsg->order = htons(pQueryMsg->order);
|
|
pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
|
|
pQueryMsg->queryType = htons(pQueryMsg->queryType);
|
|
pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
|
|
|
|
pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
|
|
pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
|
|
pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
|
|
pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
|
|
pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
|
|
pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
|
|
pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
|
|
pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
|
|
pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
|
|
|
|
// query msg safety check
|
|
if (validateQueryMsg(pQueryMsg) != 0) {
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
|
|
char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
|
|
|
|
for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
|
|
SColumnInfo *pColInfo = &pQueryMsg->colList[col];
|
|
|
|
pColInfo->colId = htons(pColInfo->colId);
|
|
pColInfo->type = htons(pColInfo->type);
|
|
pColInfo->bytes = htons(pColInfo->bytes);
|
|
pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
|
|
|
|
assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
|
|
|
|
int32_t numOfFilters = pColInfo->numOfFilters;
|
|
if (numOfFilters > 0) {
|
|
pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
|
|
}
|
|
|
|
for (int32_t f = 0; f < numOfFilters; ++f) {
|
|
SColumnFilterInfo *pFilterInfo = (SColumnFilterInfo *)pMsg;
|
|
SColumnFilterInfo *pDestFilterInfo = &pColInfo->filters[f];
|
|
|
|
pDestFilterInfo->filterstr = htons(pFilterInfo->filterstr);
|
|
|
|
pMsg += sizeof(SColumnFilterInfo);
|
|
|
|
if (pDestFilterInfo->filterstr) {
|
|
pDestFilterInfo->len = htobe64(pFilterInfo->len);
|
|
|
|
pDestFilterInfo->pz = (int64_t)calloc(1, pDestFilterInfo->len + 1);
|
|
memcpy((void *)pDestFilterInfo->pz, pMsg, pDestFilterInfo->len + 1);
|
|
pMsg += (pDestFilterInfo->len + 1);
|
|
} else {
|
|
pDestFilterInfo->lowerBndi = htobe64(pFilterInfo->lowerBndi);
|
|
pDestFilterInfo->upperBndi = htobe64(pFilterInfo->upperBndi);
|
|
}
|
|
|
|
pDestFilterInfo->lowerRelOptr = htons(pFilterInfo->lowerRelOptr);
|
|
pDestFilterInfo->upperRelOptr = htons(pFilterInfo->upperRelOptr);
|
|
}
|
|
}
|
|
|
|
bool hasArithmeticFunction = false;
|
|
|
|
*pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
|
|
SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
|
|
|
|
for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
|
|
(*pExpr)[i] = pExprMsg;
|
|
|
|
pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
|
|
pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
|
|
pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
|
|
pExprMsg->functionId = htons(pExprMsg->functionId);
|
|
pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
|
|
|
|
pMsg += sizeof(SSqlFuncMsg);
|
|
|
|
for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
|
|
pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
|
|
pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);
|
|
|
|
if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
|
|
pExprMsg->arg[j].argValue.pz = pMsg;
|
|
pMsg += pExprMsg->arg[j].argBytes + 1; // one more for the string terminated char.
|
|
} else {
|
|
pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
|
|
}
|
|
}
|
|
|
|
if (pExprMsg->functionId == TSDB_FUNC_ARITHM) {
|
|
hasArithmeticFunction = true;
|
|
} else if (pExprMsg->functionId == TSDB_FUNC_TAG || pExprMsg->functionId == TSDB_FUNC_TAGPRJ ||
|
|
pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) {
|
|
if (pExprMsg->colInfo.flag != TSDB_COL_TAG) { // ignore the column index check for arithmetic expression.
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
} else {
|
|
// if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
|
|
// return TSDB_CODE_INVALID_QUERY_MSG;
|
|
// }
|
|
}
|
|
|
|
pExprMsg = (SSqlFuncMsg *)pMsg;
|
|
}
|
|
|
|
pQueryMsg->colNameLen = htonl(pQueryMsg->colNameLen);
|
|
if (hasArithmeticFunction) { // column name array
|
|
assert(pQueryMsg->colNameLen > 0);
|
|
pQueryMsg->colNameList = (int64_t)pMsg;
|
|
pMsg += pQueryMsg->colNameLen;
|
|
}
|
|
|
|
pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
|
|
|
|
if (pQueryMsg->numOfGroupCols > 0) { // group by tag columns
|
|
*groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
|
|
|
|
for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
|
|
(*groupbyCols)[i].colId = *(int16_t *)pMsg;
|
|
pMsg += sizeof((*groupbyCols)[i].colId);
|
|
|
|
(*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
|
|
pMsg += sizeof((*groupbyCols)[i].colIndex);
|
|
|
|
(*groupbyCols)[i].flag = *(int16_t *)pMsg;
|
|
pMsg += sizeof((*groupbyCols)[i].flag);
|
|
|
|
memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
|
|
pMsg += tListLen((*groupbyCols)[i].name);
|
|
}
|
|
|
|
pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
|
|
pQueryMsg->orderType = htons(pQueryMsg->orderType);
|
|
}
|
|
|
|
pQueryMsg->interpoType = htons(pQueryMsg->interpoType);
|
|
if (pQueryMsg->interpoType != TSDB_INTERPO_NONE) {
|
|
pQueryMsg->defaultVal = (uint64_t)(pMsg);
|
|
|
|
int64_t *v = (int64_t *)pMsg;
|
|
for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
|
|
v[i] = htobe64(v[i]);
|
|
}
|
|
|
|
pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
|
|
}
|
|
|
|
if (pQueryMsg->numOfTags > 0) {
|
|
(*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
|
|
for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
|
|
SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
|
|
|
|
pTagCol->colId = htons(pTagCol->colId);
|
|
pTagCol->bytes = htons(pTagCol->bytes);
|
|
pTagCol->type = htons(pTagCol->type);
|
|
pTagCol->numOfFilters = 0;
|
|
|
|
(*tagCols)[i] = *pTagCol;
|
|
pMsg += sizeof(SColumnInfo);
|
|
}
|
|
}
|
|
|
|
// the tag query condition expression string is located at the end of query msg
|
|
if (pQueryMsg->tagCondLen > 0) {
|
|
*tagCond = calloc(1, pQueryMsg->tagCondLen);
|
|
memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
|
|
pMsg += pQueryMsg->tagCondLen;
|
|
}
|
|
|
|
if (*pMsg != 0) {
|
|
size_t len = strlen(pMsg) + 1;
|
|
*tbnameCond = malloc(len);
|
|
strcpy(*tbnameCond, pMsg);
|
|
pMsg += len;
|
|
}
|
|
|
|
qTrace("qmsg:%p query on %d table(s), qrange:%" PRId64 "-%" PRId64
|
|
", numOfGroupbyTagCols:%d, ts order:%d, "
|
|
"outputCols:%d, numOfCols:%d, interval:%d" PRId64 ", fillType:%d, comptsLen:%d, limit:%" PRId64
|
|
", offset:%" PRId64,
|
|
pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
|
|
pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
|
|
pQueryMsg->interpoType, pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int32_t buildAirthmeticExprFromMsg(SArithExprInfo *pExpr, SQueryTableMsg *pQueryMsg) {
|
|
// SExprInfo *pBinaryExprInfo = &pExpr->binExprInfo;
|
|
// SColumnInfo * pColMsg = pQueryMsg->colList;
|
|
#if 0
|
|
tExprNode* pBinExpr = NULL;
|
|
SSchema* pSchema = toSchema(pQueryMsg, pColMsg, pQueryMsg->numOfCols);
|
|
|
|
qTrace("qmsg:%p create binary expr from string:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz);
|
|
tSQLBinaryExprFromString(&pBinExpr, pSchema, pQueryMsg->numOfCols, pExpr->pBase.arg[0].argValue.pz,
|
|
pExpr->pBase.arg[0].argBytes);
|
|
|
|
if (pBinExpr == NULL) {
|
|
qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz);
|
|
return TSDB_CODE_APP_ERROR;
|
|
}
|
|
|
|
pBinaryExprInfo->pBinExpr = pBinExpr;
|
|
|
|
int32_t num = 0;
|
|
int16_t ids[TSDB_MAX_COLUMNS] = {0};
|
|
|
|
tSQLBinaryExprTrv(pBinExpr, &num, ids);
|
|
qsort(ids, num, sizeof(int16_t), id_compar);
|
|
|
|
int32_t i = 0, j = 0;
|
|
|
|
while (i < num && j < num) {
|
|
if (ids[i] == ids[j]) {
|
|
j++;
|
|
} else {
|
|
ids[++i] = ids[j++];
|
|
}
|
|
}
|
|
assert(i <= num);
|
|
|
|
// there may be duplicated referenced columns.
|
|
num = i + 1;
|
|
pBinaryExprInfo->pReqColumns = malloc(sizeof(SColIndex) * num);
|
|
|
|
for (int32_t k = 0; k < num; ++k) {
|
|
SColIndex* pColIndex = &pBinaryExprInfo->pReqColumns[k];
|
|
pColIndex->colId = ids[k];
|
|
}
|
|
|
|
pBinaryExprInfo->numOfCols = num;
|
|
free(pSchema);
|
|
#endif
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static int32_t createSqlFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SArithExprInfo **pSqlFuncExpr,
|
|
SSqlFuncMsg **pExprMsg, SColumnInfo* pTagCols) {
|
|
*pSqlFuncExpr = NULL;
|
|
int32_t code = TSDB_CODE_SUCCESS;
|
|
|
|
SArithExprInfo *pExprs = (SArithExprInfo *)calloc(1, sizeof(SArithExprInfo) * pQueryMsg->numOfOutput);
|
|
if (pExprs == NULL) {
|
|
return TSDB_CODE_SERV_OUT_OF_MEMORY;
|
|
}
|
|
|
|
bool isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
|
|
int16_t tagLen = 0;
|
|
|
|
for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
|
|
pExprs[i].pBase = *pExprMsg[i];
|
|
pExprs[i].bytes = 0;
|
|
|
|
int16_t type = 0;
|
|
int16_t bytes = 0;
|
|
|
|
// parse the arithmetic expression
|
|
if (pExprs[i].pBase.functionId == TSDB_FUNC_ARITHM) {
|
|
code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
|
|
|
|
if (code != TSDB_CODE_SUCCESS) {
|
|
tfree(pExprs);
|
|
return code;
|
|
}
|
|
|
|
type = TSDB_DATA_TYPE_DOUBLE;
|
|
bytes = tDataTypeDesc[type].nSize;
|
|
} else if (pExprs[i].pBase.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) { // parse the normal column
|
|
type = TSDB_DATA_TYPE_BINARY;
|
|
bytes = TSDB_TABLE_NAME_LEN;
|
|
} else{
|
|
int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase, pTagCols);
|
|
assert(j < pQueryMsg->numOfCols);
|
|
|
|
SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].pBase.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
|
|
type = pCol->type;
|
|
bytes = pCol->bytes;
|
|
}
|
|
|
|
int32_t param = pExprs[i].pBase.arg[0].argValue.i64;
|
|
if (getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
|
|
&pExprs[i].interResBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
|
|
tfree(pExprs);
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
|
|
if (pExprs[i].pBase.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].pBase.functionId == TSDB_FUNC_TS_DUMMY) {
|
|
tagLen += pExprs[i].bytes;
|
|
}
|
|
assert(isValidDataType(pExprs[i].type, pExprs[i].bytes));
|
|
}
|
|
|
|
// get the correct result size for top/bottom query, according to the number of tags columns in selection clause
|
|
|
|
// TODO refactor
|
|
for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
|
|
pExprs[i].pBase = *pExprMsg[i];
|
|
int16_t functId = pExprs[i].pBase.functionId;
|
|
|
|
if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
|
|
int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase, pTagCols);
|
|
assert(j < pQueryMsg->numOfCols);
|
|
|
|
SColumnInfo *pCol = &pQueryMsg->colList[j];
|
|
|
|
int32_t ret =
|
|
getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].pBase.arg[0].argValue.i64,
|
|
&pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interResBytes, tagLen, isSuperTable);
|
|
assert(ret == TSDB_CODE_SUCCESS);
|
|
}
|
|
}
|
|
|
|
tfree(pExprMsg);
|
|
*pSqlFuncExpr = pExprs;
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
|
|
if (pQueryMsg->numOfGroupCols == 0) {
|
|
return NULL;
|
|
}
|
|
|
|
// using group by tag columns
|
|
SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
|
|
if (pGroupbyExpr == NULL) {
|
|
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
|
|
return NULL;
|
|
}
|
|
|
|
pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
|
|
pGroupbyExpr->orderType = pQueryMsg->orderType;
|
|
pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;
|
|
|
|
pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
|
|
for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
|
|
taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
|
|
}
|
|
|
|
return pGroupbyExpr;
|
|
}
|
|
|
|
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
|
|
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
|
|
if (pQuery->colList[i].numOfFilters > 0) {
|
|
pQuery->numOfFilterCols++;
|
|
}
|
|
}
|
|
|
|
if (pQuery->numOfFilterCols == 0) {
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
|
|
|
|
for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
|
|
if (pQuery->colList[i].numOfFilters > 0) {
|
|
SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];
|
|
|
|
memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoData));
|
|
pFilterInfo->info = pQuery->colList[i];
|
|
|
|
pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
|
|
pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
|
|
|
|
for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
|
|
SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
|
|
pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
|
|
|
|
int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
|
|
int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;
|
|
|
|
if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
|
|
qError("QInfo:%p invalid filter info", pQInfo);
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
|
|
int16_t type = pQuery->colList[i].type;
|
|
int16_t bytes = pQuery->colList[i].bytes;
|
|
|
|
// todo refactor
|
|
__filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
|
|
__filter_func_t *filterArray = getValueFilterFuncArray(type);
|
|
|
|
if (rangeFilterArray == NULL && filterArray == NULL) {
|
|
qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
|
|
if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
|
|
(upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
|
|
if (lower == TSDB_RELATION_GREATER_EQUAL) {
|
|
if (upper == TSDB_RELATION_LESS_EQUAL) {
|
|
pSingleColFilter->fp = rangeFilterArray[4];
|
|
} else {
|
|
pSingleColFilter->fp = rangeFilterArray[2];
|
|
}
|
|
} else {
|
|
if (upper == TSDB_RELATION_LESS_EQUAL) {
|
|
pSingleColFilter->fp = rangeFilterArray[3];
|
|
} else {
|
|
pSingleColFilter->fp = rangeFilterArray[1];
|
|
}
|
|
}
|
|
} else { // set callback filter function
|
|
if (lower != TSDB_RELATION_INVALID) {
|
|
pSingleColFilter->fp = filterArray[lower];
|
|
|
|
if (upper != TSDB_RELATION_INVALID) {
|
|
qError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
|
|
return TSDB_CODE_INVALID_QUERY_MSG;
|
|
}
|
|
} else {
|
|
pSingleColFilter->fp = filterArray[upper];
|
|
}
|
|
}
|
|
assert(pSingleColFilter->fp != NULL);
|
|
pSingleColFilter->bytes = bytes;
|
|
}
|
|
|
|
j++;
|
|
}
|
|
}
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
static void doUpdateExprColumnIndex(SQuery *pQuery) {
|
|
assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
|
|
|
|
for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
|
|
SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].pBase;
|
|
if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM || pSqlExprMsg->colInfo.flag == TSDB_COL_TAG) {
|
|
continue;
|
|
}
|
|
|
|
SColIndex *pColIndexEx = &pSqlExprMsg->colInfo;
|
|
if (!TSDB_COL_IS_TAG(pColIndexEx->flag)) {
|
|
for (int32_t f = 0; f < pQuery->numOfCols; ++f) {
|
|
if (pColIndexEx->colId == pQuery->colList[f].colId) {
|
|
pColIndexEx->colIndex = f;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
for (int32_t f = 0; f < pQuery->numOfTags; ++f) {
|
|
if (pColIndexEx->colId == pQuery->tagColList[f].colId) {
|
|
pColIndexEx->colIndex = f;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SArithExprInfo *pExprs,
|
|
STableGroupInfo *groupInfo, SColumnInfo* pTagCols) {
|
|
SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
|
|
if (pQInfo == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
SQuery *pQuery = calloc(1, sizeof(SQuery));
|
|
pQInfo->runtimeEnv.pQuery = pQuery;
|
|
|
|
int16_t numOfCols = pQueryMsg->numOfCols;
|
|
int16_t numOfOutput = pQueryMsg->numOfOutput;
|
|
|
|
pQuery->numOfCols = numOfCols;
|
|
pQuery->numOfOutput = numOfOutput;
|
|
pQuery->limit.limit = pQueryMsg->limit;
|
|
pQuery->limit.offset = pQueryMsg->offset;
|
|
pQuery->order.order = pQueryMsg->order;
|
|
pQuery->order.orderColId = pQueryMsg->orderColId;
|
|
pQuery->pSelectExpr = pExprs;
|
|
pQuery->pGroupbyExpr = pGroupbyExpr;
|
|
pQuery->intervalTime = pQueryMsg->intervalTime;
|
|
pQuery->slidingTime = pQueryMsg->slidingTime;
|
|
pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
|
|
pQuery->interpoType = pQueryMsg->interpoType;
|
|
pQuery->numOfTags = pQueryMsg->numOfTags;
|
|
|
|
pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
|
|
if (pQuery->colList == NULL) {
|
|
goto _cleanup;
|
|
}
|
|
|
|
for (int16_t i = 0; i < numOfCols; ++i) {
|
|
pQuery->colList[i] = pQueryMsg->colList[i];
|
|
|
|
SColumnInfo *pColInfo = &pQuery->colList[i];
|
|
pColInfo->filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pColInfo->numOfFilters);
|
|
}
|
|
|
|
pQuery->tagColList = pTagCols;
|
|
|
|
// calculate the result row size
|
|
for (int16_t col = 0; col < numOfOutput; ++col) {
|
|
assert(pExprs[col].bytes > 0);
|
|
pQuery->rowSize += pExprs[col].bytes;
|
|
}
|
|
|
|
doUpdateExprColumnIndex(pQuery);
|
|
|
|
int32_t ret = createFilterInfo(pQInfo, pQuery);
|
|
if (ret != TSDB_CODE_SUCCESS) {
|
|
goto _cleanup;
|
|
}
|
|
|
|
// prepare the result buffer
|
|
pQuery->sdata = (SData **)calloc(pQuery->numOfOutput, POINTER_BYTES);
|
|
if (pQuery->sdata == NULL) {
|
|
goto _cleanup;
|
|
}
|
|
|
|
// set the output buffer capacity
|
|
pQuery->rec.capacity = 4096;
|
|
pQuery->rec.threshold = 4000;
|
|
|
|
for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
|
|
assert(pExprs[col].interResBytes >= pExprs[col].bytes);
|
|
|
|
// allocate additional memory for interResults that are usually larger then final results
|
|
size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interResBytes + sizeof(SData);
|
|
pQuery->sdata[col] = (SData *)calloc(1, size);
|
|
if (pQuery->sdata[col] == NULL) {
|
|
goto _cleanup;
|
|
}
|
|
}
|
|
|
|
if (pQuery->interpoType != TSDB_INTERPO_NONE) {
|
|
pQuery->defaultVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
|
|
if (pQuery->defaultVal == NULL) {
|
|
goto _cleanup;
|
|
}
|
|
|
|
// the first column is the timestamp
|
|
memcpy(pQuery->defaultVal, (char *)pQueryMsg->defaultVal, pQuery->numOfOutput * sizeof(int64_t));
|
|
}
|
|
|
|
// to make sure third party won't overwrite this structure
|
|
pQInfo->signature = pQInfo;
|
|
pQInfo->groupInfo = *groupInfo;
|
|
|
|
pQuery->pos = -1;
|
|
|
|
pQuery->window = pQueryMsg->window;
|
|
pQuery->lastKey = pQuery->window.skey;
|
|
|
|
if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
|
|
qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
|
|
goto _cleanup;
|
|
}
|
|
|
|
vnodeParametersSafetyCheck(pQuery);
|
|
|
|
qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
|
|
return pQInfo;
|
|
|
|
_cleanup:
|
|
tfree(pQuery->defaultVal);
|
|
|
|
if (pQuery->sdata != NULL) {
|
|
for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
|
|
tfree(pQuery->sdata[col]);
|
|
}
|
|
}
|
|
|
|
tfree(pQuery->sdata);
|
|
tfree(pQuery->pFilterInfo);
|
|
tfree(pQuery->colList);
|
|
|
|
tfree(pExprs);
|
|
tfree(pGroupbyExpr);
|
|
|
|
tfree(pQInfo);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static bool isValidQInfo(void *param) {
|
|
SQInfo *pQInfo = (SQInfo *)param;
|
|
if (pQInfo == NULL) {
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* pQInfo->signature may be changed by another thread, so we assign value of signature
|
|
* into local variable, then compare by using local variable
|
|
*/
|
|
uint64_t sig = (uint64_t)pQInfo->signature;
|
|
return (sig == (uint64_t)pQInfo);
|
|
}
|
|
|
|
static void freeQInfo(SQInfo *pQInfo);
|
|
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, SQInfo *pQInfo, bool isSTable) {
|
|
int32_t code = TSDB_CODE_SUCCESS;
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
STSBuf *pTSBuf = NULL;
|
|
if (pQueryMsg->tsLen > 0) { // open new file to save the result
|
|
char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
|
|
pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
|
|
|
|
tsBufResetPos(pTSBuf);
|
|
tsBufNextPos(pTSBuf);
|
|
}
|
|
|
|
// only the successful complete requries the sem_post/over = 1 operations.
|
|
if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
|
|
(!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
|
|
qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
|
|
pQuery->window.ekey, pQuery->order.order);
|
|
|
|
sem_post(&pQInfo->dataReady);
|
|
setQueryStatus(pQuery, QUERY_COMPLETED);
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
// filter the qualified
|
|
if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, isSTable)) != TSDB_CODE_SUCCESS) {
|
|
goto _error;
|
|
}
|
|
|
|
// qTrace("QInfo:%p set query flag and prepare runtime environment completed, ref:%d, wait for schedule", pQInfo,
|
|
// pQInfo->refCount);
|
|
return code;
|
|
|
|
_error:
|
|
// table query ref will be decrease during error handling
|
|
freeQInfo(pQInfo);
|
|
return code;
|
|
}
|
|
|
|
static void freeQInfo(SQInfo *pQInfo) {
|
|
if (!isValidQInfo(pQInfo)) {
|
|
return;
|
|
}
|
|
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
setQueryKilled(pQInfo);
|
|
|
|
qTrace("QInfo:%p start to free QInfo", pQInfo);
|
|
for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
|
|
tfree(pQuery->sdata[col]);
|
|
}
|
|
|
|
sem_destroy(&(pQInfo->dataReady));
|
|
teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
|
|
|
|
for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
|
|
SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
|
|
if (pColFilter->numOfFilters > 0) {
|
|
tfree(pColFilter->pFilters);
|
|
}
|
|
}
|
|
|
|
if (pQuery->pSelectExpr != NULL) {
|
|
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
|
|
SExprInfo *pBinExprInfo = &pQuery->pSelectExpr[i].binExprInfo;
|
|
|
|
if (pBinExprInfo->numOfCols > 0) {
|
|
tfree(pBinExprInfo->pReqColumns);
|
|
tExprTreeDestroy(&pBinExprInfo->pBinExpr, NULL);
|
|
}
|
|
}
|
|
|
|
tfree(pQuery->pSelectExpr);
|
|
}
|
|
|
|
if (pQuery->defaultVal != NULL) {
|
|
tfree(pQuery->defaultVal);
|
|
}
|
|
|
|
// todo refactor, extract method to destroytableDataInfo
|
|
int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
|
|
for (int32_t i = 0; i < numOfGroups; ++i) {
|
|
SArray *p = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
|
|
|
|
size_t num = taosArrayGetSize(p);
|
|
for(int32_t j = 0; j < num; ++j) {
|
|
SPair* pair = taosArrayGet(p, j);
|
|
if (pair->sec != NULL) {
|
|
destroyTableQueryInfo(((STableDataInfo*)pair->sec)->pTableQInfo, pQuery->numOfOutput);
|
|
tfree(pair->sec);
|
|
}
|
|
}
|
|
taosArrayDestroy(p);
|
|
}
|
|
|
|
if (pQuery->pGroupbyExpr != NULL) {
|
|
taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
|
|
tfree(pQuery->pGroupbyExpr);
|
|
}
|
|
|
|
tfree(pQuery->tagColList);
|
|
tfree(pQuery->pFilterInfo);
|
|
tfree(pQuery->colList);
|
|
tfree(pQuery->sdata);
|
|
|
|
taosArrayDestroy(pQInfo->groupInfo.pGroupList);
|
|
tfree(pQuery);
|
|
|
|
qTrace("QInfo:%p QInfo is freed", pQInfo);
|
|
|
|
// destroy signature, in order to avoid the query process pass the object safety check
|
|
memset(pQInfo, 0, sizeof(SQInfo));
|
|
tfree(pQInfo);
|
|
}
|
|
|
|
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
/*
|
|
* get the file size and set the numOfRows to be the file size, since for tsComp query,
|
|
* the returned row size is equalled to 1
|
|
* TODO handle the case that the file is too large to send back one time
|
|
*/
|
|
if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
|
|
struct stat fstat;
|
|
if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
|
|
*numOfRows = fstat.st_size;
|
|
return fstat.st_size;
|
|
} else {
|
|
qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
|
|
return 0;
|
|
}
|
|
} else {
|
|
return pQuery->rowSize * (*numOfRows);
|
|
}
|
|
}
|
|
|
|
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
|
|
// the remained number of retrieved rows, not the interpolated result
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
|
|
// load data from file to msg buffer
|
|
if (isTSCompQuery(pQuery)) {
|
|
int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
|
|
|
|
// make sure file exist
|
|
if (FD_VALID(fd)) {
|
|
size_t s = lseek(fd, 0, SEEK_END);
|
|
qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
|
|
|
|
lseek(fd, 0, SEEK_SET);
|
|
read(fd, data, s);
|
|
close(fd);
|
|
|
|
unlink(pQuery->sdata[0]->data);
|
|
} else {
|
|
qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
|
|
pQuery->sdata[0]->data, strerror(errno));
|
|
}
|
|
} else {
|
|
doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
|
|
}
|
|
|
|
pQuery->rec.total += pQuery->rec.rows;
|
|
qTrace("QInfo:%p current:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
|
|
|
|
return TSDB_CODE_SUCCESS;
|
|
|
|
// todo if interpolation exists, the result may be dump to client by several rounds
|
|
}
|
|
|
|
int32_t qCreateQueryInfo(void *tsdb, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
|
|
assert(pQueryMsg != NULL);
|
|
|
|
int32_t code = TSDB_CODE_SUCCESS;
|
|
|
|
char * tagCond = NULL, *tbnameCond = NULL;
|
|
SArray * pTableIdList = NULL;
|
|
SSqlFuncMsg **pExprMsg = NULL;
|
|
SColIndex * pGroupColIndex = NULL;
|
|
SColumnInfo* pTagColumnInfo = NULL;
|
|
|
|
if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
|
|
TSDB_CODE_SUCCESS) {
|
|
return code;
|
|
}
|
|
|
|
if (pQueryMsg->numOfTables <= 0) {
|
|
qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
|
|
code = TSDB_CODE_INVALID_QUERY_MSG;
|
|
goto _query_over;
|
|
}
|
|
|
|
if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
|
|
qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
|
|
code = TSDB_CODE_INVALID_QUERY_MSG;
|
|
goto _query_over;
|
|
}
|
|
|
|
SArithExprInfo *pExprs = NULL;
|
|
if ((code = createSqlFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
|
|
goto _query_over;
|
|
}
|
|
|
|
SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
|
|
if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
|
|
goto _query_over;
|
|
}
|
|
|
|
bool isSTableQuery = false;
|
|
STableGroupInfo groupInfo = {0};
|
|
|
|
if ((pQueryMsg->queryType & TSDB_QUERY_TYPE_STABLE_QUERY) != 0) {
|
|
isSTableQuery = true;
|
|
|
|
STableId *id = taosArrayGet(pTableIdList, 0);
|
|
id->uid = -1; // todo fix me
|
|
|
|
// group by normal column, do not pass the group by condition to tsdb to group table into different group
|
|
int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
|
|
if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
|
|
numOfGroupByCols = 0;
|
|
}
|
|
|
|
// todo handle the error
|
|
/*int32_t ret =*/tsdbQueryByTagsCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &groupInfo, pGroupColIndex,
|
|
numOfGroupByCols);
|
|
if (groupInfo.numOfTables == 0) { // no qualified tables no need to do query
|
|
code = TSDB_CODE_SUCCESS;
|
|
goto _query_over;
|
|
}
|
|
} else {
|
|
assert(taosArrayGetSize(pTableIdList) == 1);
|
|
|
|
STableId *id = taosArrayGet(pTableIdList, 0);
|
|
if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &groupInfo)) != TSDB_CODE_SUCCESS) {
|
|
goto _query_over;
|
|
}
|
|
}
|
|
|
|
(*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &groupInfo, pTagColumnInfo);
|
|
if ((*pQInfo) == NULL) {
|
|
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
|
|
}
|
|
|
|
code = initQInfo(pQueryMsg, tsdb, *pQInfo, isSTableQuery);
|
|
|
|
_query_over:
|
|
tfree(tagCond);
|
|
tfree(tbnameCond);
|
|
taosArrayDestroy(pTableIdList);
|
|
|
|
// if failed to add ref for all meters in this query, abort current query
|
|
// atomic_fetch_add_32(&vnodeSelectReqNum, 1);
|
|
return TSDB_CODE_SUCCESS;
|
|
}
|
|
|
|
void qDestroyQueryInfo(qinfo_t pQInfo) {
|
|
qTrace("QInfo:%p query completed", pQInfo);
|
|
freeQInfo(pQInfo);
|
|
}
|
|
|
|
void qTableQuery(qinfo_t qinfo) {
|
|
SQInfo *pQInfo = (SQInfo *)qinfo;
|
|
|
|
if (pQInfo == NULL || pQInfo->signature != pQInfo) {
|
|
qTrace("%p freed abort query", pQInfo);
|
|
return;
|
|
}
|
|
|
|
if (isQueryKilled(pQInfo)) {
|
|
qTrace("QInfo:%p it is already killed, abort", pQInfo);
|
|
return;
|
|
}
|
|
|
|
qTrace("QInfo:%p query task is launched", pQInfo);
|
|
|
|
if (pQInfo->runtimeEnv.stableQuery) {
|
|
stableQueryImpl(pQInfo);
|
|
} else {
|
|
tableQueryImpl(pQInfo);
|
|
}
|
|
|
|
// vnodeDecRefCount(pQInfo);
|
|
}
|
|
|
|
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
|
|
SQInfo *pQInfo = (SQInfo *)qinfo;
|
|
|
|
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
|
|
return TSDB_CODE_INVALID_QHANDLE;
|
|
}
|
|
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
if (isQueryKilled(pQInfo)) {
|
|
qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
|
|
return pQInfo->code;
|
|
}
|
|
|
|
sem_wait(&pQInfo->dataReady);
|
|
qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%d, code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
|
|
pQInfo->code);
|
|
|
|
return pQInfo->code;
|
|
}
|
|
|
|
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
|
|
SQInfo *pQInfo = (SQInfo *)qinfo;
|
|
|
|
if (pQInfo == NULL || pQInfo->signature != pQInfo || pQInfo->code != TSDB_CODE_SUCCESS) {
|
|
return false;
|
|
}
|
|
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
|
|
return false;
|
|
} else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
|
|
return true;
|
|
} else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
|
|
return true;
|
|
} else {
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
|
|
SQInfo *pQInfo = (SQInfo *)qinfo;
|
|
|
|
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
|
|
return TSDB_CODE_INVALID_QHANDLE;
|
|
}
|
|
|
|
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
|
|
size_t size = getResultSize(pQInfo, &pQuery->rec.rows);
|
|
*contLen = size + sizeof(SRetrieveTableRsp);
|
|
|
|
// todo handle failed to allocate memory
|
|
*pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
|
|
(*pRsp)->numOfRows = htonl(pQuery->rec.rows);
|
|
|
|
int32_t code = pQInfo->code;
|
|
if (code == TSDB_CODE_SUCCESS) {
|
|
(*pRsp)->offset = htobe64(pQuery->limit.offset);
|
|
(*pRsp)->useconds = htobe64(pQInfo->elapsedTime);
|
|
} else {
|
|
(*pRsp)->offset = 0;
|
|
(*pRsp)->useconds = 0;
|
|
}
|
|
|
|
if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
|
|
code = doDumpQueryResult(pQInfo, (*pRsp)->data);
|
|
} else {
|
|
setQueryStatus(pQuery, QUERY_OVER);
|
|
code = pQInfo->code;
|
|
}
|
|
|
|
if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
|
|
(*pRsp)->completed = 1; // notify no more result to client
|
|
}
|
|
|
|
return code;
|
|
|
|
// if (numOfRows == 0 && (pRetrieve->qhandle == (uint64_t)pObj->qhandle) && (code != TSDB_CODE_ACTION_IN_PROGRESS)) {
|
|
// qTrace("QInfo:%p %s free qhandle code:%d", pObj->qhandle, __FUNCTION__, code);
|
|
// vnodeDecRefCount(pObj->qhandle);
|
|
// pObj->qhandle = NULL;
|
|
// }
|
|
}
|