[td-10564] Add codes for query parsing.

This commit is contained in:
Haojun Liao 2021-10-21 11:06:52 +08:00
parent 132e3b458e
commit b7917aa43b
32 changed files with 11893 additions and 115 deletions

View File

@ -36,4 +36,32 @@
// int16_t bytes;
//} SSchema;
typedef struct SColumnDataAgg {
int16_t colId;
int64_t sum;
int64_t max;
int64_t min;
int16_t maxIndex;
int16_t minIndex;
int16_t numOfNull;
} SColumnDataAgg;
typedef struct SDataBlockInfo {
STimeWindow window;
int32_t rows;
int32_t numOfCols;
int64_t uid;
} SDataBlockInfo;
typedef struct SSDataBlock {
SColumnDataAgg *pBlockAgg;
SArray *pDataBlock; // SArray<SColumnInfoData>
SDataBlockInfo info;
} SSDataBlock;
typedef struct SColumnInfoData {
SColumnInfo info; // TODO filter info needs to be removed
char *pData; // the corresponding block data in memory
} SColumnInfoData;
#endif // TDENGINE_COMMON_H

View File

@ -184,16 +184,6 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo);
*/
void** qDeregisterQInfo(void* pMgmt, void* pQInfo);
//======================================================================================================================
// built-in sql functions
/**
* If the given name is a valid built-in sql function, the value of true will be returned.
* @param name
* @param len
* @return
*/
bool isBuiltinFunction(const char* name, int32_t len);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,164 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TFUNCTION_H
#define TDENGINE_TFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
#define FUNCTION_SCALAR 1
#define FUNCTION_AGG 2
#define FUNCTIONS_NAME_MAX_LENGTH 16
#define FUNCTION_INVALID_ID -1
#define FUNCTION_COUNT 0
#define FUNCTION_SUM 1
#define FUNCTION_AVG 2
#define FUNCTION_MIN 3
#define FUNCTION_MAX 4
#define FUNCTION_STDDEV 5
#define FUNCTION_PERCT 6
#define FUNCTION_APERCT 7
#define FUNCTION_FIRST 8
#define FUNCTION_LAST 9
#define FUNCTION_LAST_ROW 10
#define FUNCTION_TOP 11
#define FUNCTION_BOTTOM 12
#define FUNCTION_SPREAD 13
#define FUNCTION_TWA 14
#define FUNCTION_LEASTSQR 15
#define FUNCTION_TS 16
#define FUNCTION_TS_DUMMY 17
#define FUNCTION_TAG_DUMMY 18
#define FUNCTION_TS_COMP 19
#define FUNCTION_TAG 20
#define FUNCTION_PRJ 21
#define FUNCTION_TAGPRJ 22
#define FUNCTION_ARITHM 23
#define FUNCTION_DIFF 24
#define FUNCTION_FIRST_DST 25
#define FUNCTION_LAST_DST 26
#define FUNCTION_STDDEV_DST 27
#define FUNCTION_INTERP 28
#define FUNCTION_RATE 29
#define FUNCTION_IRATE 30
#define FUNCTION_TID_TAG 31
#define FUNCTION_DERIVATIVE 32
#define FUNCTION_BLKINFO 33
#define FUNCTION_HISTOGRAM 34
#define FUNCTION_HLL 35
#define FUNCTION_MODE 36
#define FUNCTION_SAMPLE 37
typedef struct SPoint1 {
int64_t key;
union{double val; char* ptr;};
} SPoint1;
struct SQLFunctionCtx;
struct SResultRowCellInfo;
//for selectivity query, the corresponding tag value is assigned if the data is qualified
typedef struct SExtTagsInfo {
int16_t tagsLen; // keep the tags data for top/bottom query result
int16_t numOfTagCols;
struct SQLFunctionCtx **pTagCtxList;
} SExtTagsInfo;
// sql function runtime context
typedef struct SQLFunctionCtx {
int32_t size; // number of rows
void * pInput; // input data buffer
uint32_t order; // asc|desc
int16_t inputType;
int16_t inputBytes;
int16_t outputType;
int16_t outputBytes; // size of results, determined by function and input column data type
int32_t interBufBytes; // internal buffer size
bool hasNull; // null value exist in current block
bool requireNull; // require null in some function
bool stableQuery;
int16_t functionId; // function id
char * pOutput; // final result output buffer, point to sdata->data
uint8_t currentStage; // record current running step, default: 0
int64_t startTs; // timestamp range of current query when function is executed on a specific data block
int32_t numOfParams;
SVariant param[4]; // input parameter, e.g., top(k, 20), the number of results for top query is kept in param
int64_t *ptsList; // corresponding timestamp array list
void *ptsOutputBuf; // corresponding output buffer for timestamp of each result, e.g., top/bottom*/
SVariant tag;
bool isSmaSet;
SColumnDataAgg sma;
struct SResultRowCellInfo *resultInfo;
SExtTagsInfo tagInfo;
SPoint1 start;
SPoint1 end;
} SQLFunctionCtx;
typedef struct SAggFunctionInfo {
char name[FUNCTIONS_NAME_MAX_LENGTH];
int8_t type; // Scalar function or aggregation function
uint8_t functionId; // Function Id
int8_t sFunctionId; // Transfer function for super table query
uint16_t status;
bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment
void (*exec)(SQLFunctionCtx *pCtx);
// finalizer must be called after all exec has been executed to generated final result.
void (*xFinalize)(SQLFunctionCtx *pCtx);
void (*mergeFunc)(SQLFunctionCtx *pCtx);
int32_t (*dataReqFunc)(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId);
} SAggFunctionInfo;
typedef struct SScalarFunctionInfo {
char name[FUNCTIONS_NAME_MAX_LENGTH];
int8_t type; // scalar function or aggregation function
uint8_t functionId; // index of scalar function
bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment
void (*exec)(SQLFunctionCtx *pCtx);
} SScalarFunctionInfo;
int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionId, int32_t param, int16_t *type,
int16_t *len, int32_t *interBytes, int16_t extLength, bool isSuperTable/*, SUdfInfo* pUdfInfo*/);
/**
* If the given name is a valid built-in sql function, the value of true will be returned.
* @param name
* @param len
* @return
*/
int32_t qIsBuiltinFunction(const char* name, int32_t len);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TFUNCTION_H

View File

@ -95,10 +95,7 @@ typedef struct STagCond {
typedef struct STableMetaInfo {
STableMeta *pTableMeta; // table meta, cached in client side and acquired by name
uint32_t tableMetaSize;
// size_t tableMetaCapacity;
SVgroupsInfo *vgroupList;
SArray *pVgroupTables; // SArray<SVgroupTableInfo>
/*
* 1. keep the vgroup index during the multi-vnode super table projection query
@ -110,6 +107,20 @@ typedef struct STableMetaInfo {
SArray *tagColList; // SArray<SColumn*>, involved tag columns
} STableMetaInfo;
typedef struct SQueryType {
bool stableQuery;
bool groupbyColumn;
bool simpleAgg;
bool arithmeticOnAgg;
bool projectionQuery;
bool hasFilter;
bool onlyTagQuery;
bool orderProjectQuery;
bool stateWindow;
bool globalMerge;
bool multigroupResult;
} SQueryType;
typedef struct SQueryStmtInfo {
int16_t command; // the command may be different for each subclause, so keep it seperately.
uint32_t type; // query/insert type
@ -152,17 +163,6 @@ typedef struct SQueryStmtInfo {
SArray *pUpstream; // SArray<struct SQueryStmtInfo>
struct SQueryStmtInfo *pDownstream;
int32_t havingFieldNum;
bool stableQuery;
bool groupbyColumn;
bool simpleAgg;
bool arithmeticOnAgg;
bool projectionQuery;
bool hasFilter;
bool onlyTagQuery;
bool orderProjectQuery;
bool stateWindow;
bool globalMerge;
bool multigroupResult;
} SQueryStmtInfo;
struct SInsertStmtInfo;

View File

@ -44,6 +44,7 @@ extern "C" {
#include <errno.h>
#include <float.h>
#include <math.h>
#include <sys/stat.h>
#include "osAtomic.h"
#include "osDef.h"

169
include/util/tpagedfile.h Normal file
View File

@ -0,0 +1,169 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TPAGEDFILE_H
#define TDENGINE_TPAGEDFILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tlist.h"
#include "thash.h"
#include "os.h"
#include "tlockfree.h"
typedef struct SArray* SIDList;
typedef struct SPageDiskInfo {
int32_t offset;
int32_t length;
} SPageDiskInfo;
typedef struct SPageInfo {
SListNode* pn; // point to list node
int32_t pageId;
SPageDiskInfo info;
void* pData;
bool used; // set current page is in used
} SPageInfo;
typedef struct SFreeListItem {
int32_t offset;
int32_t len;
} SFreeListItem;
typedef struct SResultBufStatis {
int32_t flushBytes;
int32_t loadBytes;
int32_t getPages;
int32_t releasePages;
int32_t flushPages;
} SResultBufStatis;
typedef struct SDiskbasedResultBuf {
int32_t numOfPages;
int64_t totalBufSize;
int64_t fileSize; // disk file size
FILE* file;
int32_t allocateId; // allocated page id
char* path; // file path
int32_t pageSize; // current used page size
int32_t inMemPages; // numOfPages that are allocated in memory
SHashObj* groupSet; // id hash table
SHashObj* all;
SList* lruList;
void* emptyDummyIdList; // dummy id list
void* assistBuf; // assistant buffer for compress/decompress data
SArray* pFree; // free area in file
bool comp; // compressed before flushed to disk
int32_t nextPos; // next page flush position
uint64_t qId; // for debug purpose
SResultBufStatis statis;
} SDiskbasedResultBuf;
#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) // in bytes
#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1}
#define DEFAULT_PAGE_SIZE (16384L)
typedef struct SFilePage {
int64_t num;
char data[];
} SFilePage;
/**
* create disk-based result buffer
* @param pResultBuf
* @param rowSize
* @param pagesize
* @param inMemPages
* @param handle
* @return
*/
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId);
/**
*
* @param pResultBuf
* @param groupId
* @param pageId
* @return
*/
SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId);
/**
*
* @param pResultBuf
* @param groupId
* @return
*/
SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId);
/**
* get the specified buffer page by id
* @param pResultBuf
* @param id
* @return
*/
tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id);
/**
* release the referenced buf pages
* @param pResultBuf
* @param page
*/
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page);
/**
*
* @param pResultBuf
* @param pi
*/
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi);
/**
* get the total buffer size in the format of disk file
* @param pResultBuf
* @return
*/
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf);
/**
* get the number of groups in the result buffer
* @param pResultBuf
* @return
*/
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf);
/**
* destroy result buffer
* @param pResultBuf
*/
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf);
/**
*
* @param pList
* @return
*/
SPageInfo* getLastPageInfo(SIDList pList);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TPAGEDFILE_H

View File

@ -8,4 +8,5 @@ add_subdirectory(scheduler)
add_subdirectory(lru)
add_subdirectory(catalog)
add_subdirectory(executor)
add_subdirectory(planner)
add_subdirectory(planner)
add_subdirectory(function)

View File

@ -0,0 +1,12 @@
aux_source_directory(src FUNCTION_SRC)
add_library(function ${FUNCTION_SRC})
target_include_directories(
function
PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/function"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(
function
PRIVATE os util common
)

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TAGGFUNCTION_H
#define TDENGINE_TAGGFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "tname.h"
#include "taosdef.h"
#include "tvariant.h"
#include "function.h"
#include "tudf.h"
extern SAggFunctionInfo aggFunc[34];
typedef struct SResultRowCellInfo {
int8_t hasResult; // result generated, not NULL value
bool initialized; // output buffer has been initialized
bool complete; // query has completed
uint32_t numOfRes; // num of output result in current buffer
} SResultRowCellInfo;
#define FUNCSTATE_SO 0x0u
#define FUNCSTATE_MO 0x1u // dynamic number of output, not multinumber of output e.g., TOP/BOTTOM
#define FUNCSTATE_STREAM 0x2u // function avail for stream
#define FUNCSTATE_STABLE 0x4u // function avail for super table
#define FUNCSTATE_NEED_TS 0x8u // timestamp is required during query processing
#define FUNCSTATE_SELECTIVITY 0x10u // selectivity functions, can exists along with tag columns
#define BASIC_FUNC_SO FUNCSTATE_SO | FUNCSTATE_STREAM | FUNCSTATE_STABLE
#define BASIC_FUNC_MO FUNCSTATE_MO | FUNCSTATE_STREAM | FUNCSTATE_STABLE
#define AVG_FUNCTION_INTER_BUFFER_SIZE 50
#define DATA_SET_FLAG ',' // to denote the output area has data, not null value
#define DATA_SET_FLAG_SIZE sizeof(DATA_SET_FLAG)
#define QUERY_ASC_FORWARD_STEP 1
#define QUERY_DESC_FORWARD_STEP -1
#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP)
#define MAX_INTERVAL_TIME_WINDOW 1000000 // maximum allowed time windows in final results
#define TOP_BOTTOM_QUERY_LIMIT 100
enum {
MASTER_SCAN = 0x0u,
REVERSE_SCAN = 0x1u,
REPEAT_SCAN = 0x2u, //repeat scan belongs to the master scan
MERGE_STAGE = 0x20u,
};
#define QUERY_IS_STABLE_QUERY(type) (((type)&TSDB_QUERY_TYPE_STABLE_QUERY) != 0)
#define QUERY_IS_JOIN_QUERY(type) (TSDB_QUERY_HAS_TYPE(type, TSDB_QUERY_TYPE_JOIN_QUERY))
#define QUERY_IS_PROJECTION_QUERY(type) (((type)&TSDB_QUERY_TYPE_PROJECTION_QUERY) != 0)
#define QUERY_IS_FREE_RESOURCE(type) (((type)&TSDB_QUERY_TYPE_FREE_RESOURCE) != 0)
typedef struct SArithmeticSupport {
SExprInfo *pExprInfo;
int32_t numOfCols;
SColumnInfo *colList;
void *exprList; // client side used
int32_t offset;
char** data;
} SArithmeticSupport;
typedef struct SInterpInfoDetail {
TSKEY ts; // interp specified timestamp
int8_t type;
int8_t primaryCol;
} SInterpInfoDetail;
#define GET_ROWCELL_INTERBUF(_c) ((void*) ((char*)(_c) + sizeof(SResultRowCellInfo)))
#define GET_RES_INFO(ctx) ((ctx)->resultInfo)
#define IS_STREAM_QUERY_VALID(x) (((x)&TSDB_FUNCSTATE_STREAM) != 0)
#define IS_MULTIOUTPUT(x) (((x)&TSDB_FUNCSTATE_MO) != 0)
// determine the real data need to calculated the result
enum {
BLK_DATA_NO_NEEDED = 0x0,
BLK_DATA_STATIS_NEEDED = 0x1,
BLK_DATA_ALL_NEEDED = 0x3,
BLK_DATA_DISCARD = 0x4, // discard current data block since it is not qualified for filter
};
typedef struct STwaInfo {
int8_t hasResult; // flag to denote has value
double dOutput;
SPoint1 p;
STimeWindow win;
} STwaInfo;
extern int32_t functionCompatList[]; // compatible check array list
bool topbot_datablock_filter(SQLFunctionCtx *pCtx, const char *minval, const char *maxval);
/**
* the numOfRes should be kept, since it may be used later
* and allow the ResultInfo to be re initialized
*/
#define RESET_RESULT_INFO(_r) \
do { \
(_r)->initialized = false; \
} while (0)
static FORCE_INLINE void initResultInfo(SResultRowCellInfo *pResInfo, int32_t bufLen) {
pResInfo->initialized = true; // the this struct has been initialized flag
pResInfo->complete = false;
pResInfo->hasResult = false;
pResInfo->numOfRes = 0;
memset(GET_ROWCELL_INTERBUF(pResInfo), 0, bufLen);
}
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TAGGFUNCTION_H

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TFILL_H
#define TDENGINE_TFILL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "taosdef.h"
struct SSDataBlock;
typedef struct {
STColumn col; // column info
int16_t functionId; // sql function id
int16_t flag; // column flag: TAG COLUMN|NORMAL COLUMN
int16_t tagIndex; // index of current tag in SFillTagColInfo array list
union {int64_t i; double d;} fillVal;
} SFillColInfo;
typedef struct {
SSchema col;
char* tagVal;
} SFillTagColInfo;
typedef struct SFillInfo {
TSKEY start; // start timestamp
TSKEY end; // endKey for fill
TSKEY currentKey; // current active timestamp, the value may be changed during the fill procedure.
int32_t order; // order [TSDB_ORDER_ASC|TSDB_ORDER_DESC]
int32_t type; // fill type
int32_t numOfRows; // number of rows in the input data block
int32_t index; // active row index
int32_t numOfTotal; // number of filled rows in one round
int32_t numOfCurrent; // number of filled rows in current results
int32_t numOfTags; // number of tags
int32_t numOfCols; // number of columns, including the tags columns
int32_t rowSize; // size of each row
SInterval interval;
char * prevValues; // previous row of data, to generate the interpolation results
char * nextValues; // next row of data
char** pData; // original result data block involved in filling data
int32_t alloc; // data buffer size in rows
int8_t precision; // time resoluation
SFillColInfo* pFillCol; // column info for fill operations
SFillTagColInfo* pTags; // tags value for filling gap
void* handle; // for debug purpose
} SFillInfo;
typedef struct SPoint {
int64_t key;
void * val;
} SPoint;
SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols,
int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType,
SFillColInfo* pFillCol, void* handle);
void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp);
void* taosDestroyFillInfo(SFillInfo *pFillInfo);
void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey);
void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const struct SSDataBlock* pInput);
bool taosFillHasMoreResults(SFillInfo* pFillInfo);
int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, int64_t ekey, int32_t maxNumOfRows);
int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType);
int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TFILL_H

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_HISTOGRAM_H
#define TDENGINE_HISTOGRAM_H
#ifdef __cplusplus
extern "C" {
#endif
#define USE_ARRAYLIST
#define MAX_HISTOGRAM_BIN 500
typedef struct SHistBin {
double val;
int64_t num;
#if !defined(USE_ARRAYLIST)
double delta;
int32_t index; // index in min-heap list
#endif
} SHistBin;
typedef struct SHeapEntry {
void* pData;
double val;
} SHeapEntry;
typedef struct SHistogramInfo {
int64_t numOfElems;
int32_t numOfEntries;
int32_t maxEntries;
double min;
double max;
#if defined(USE_ARRAYLIST)
SHistBin* elems;
#else
tSkipList* pList;
SLoserTreeInfo* pLoserTree;
int32_t maxIndex;
bool ordered;
#endif
} SHistogramInfo;
SHistogramInfo* tHistogramCreate(int32_t numOfBins);
SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins);
int32_t tHistogramAdd(SHistogramInfo** pHisto, double val);
int64_t tHistogramSum(SHistogramInfo* pHisto, double v);
double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num);
SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries);
void tHistogramDestroy(SHistogramInfo** pHisto);
void tHistogramPrint(SHistogramInfo* pHisto);
int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val);
SHeapEntry* tHeapCreate(int32_t numOfEntries);
void tHeapSort(SHeapEntry* pEntry, int32_t len);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_HISTOGRAM_H

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TPERCENTILE_H
#define TDENGINE_TPERCENTILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "qExtbuffer.h"
#include "qResultbuf.h"
#include "ttszip.h"
typedef struct MinMaxEntry {
union {
double dMinVal;
int64_t i64MinVal;
uint64_t u64MinVal;
};
union {
double dMaxVal;
int64_t i64MaxVal;
int64_t u64MaxVal;
};
} MinMaxEntry;
typedef struct {
int32_t size;
int32_t pageId;
tFilePage *data;
} SSlotInfo;
typedef struct tMemBucketSlot {
SSlotInfo info;
MinMaxEntry range;
} tMemBucketSlot;
struct tMemBucket;
typedef int32_t (*__perc_hash_func_t)(struct tMemBucket *pBucket, const void *value);
typedef struct tMemBucket {
int16_t numOfSlots;
int16_t type;
int16_t bytes;
int32_t total;
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
__compar_fn_t comparFn;
tMemBucketSlot * pSlots;
SDiskbasedResultBuf *pBuffer;
__perc_hash_func_t hashFunc;
} tMemBucket;
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval);
void tMemBucketDestroy(tMemBucket *pBucket);
int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size);
double getPercentile(tMemBucket *pMemBucket, double percent);
#endif // TDENGINE_TPERCENTILE_H
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TSCALARFUNCTION_H
#define TDENGINE_TSCALARFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "function.h"
extern struct SScalarFunctionInfo scalarFunc[1];
#define FUNCTION_CEIL 38
#define FUNCTION_FLOOR 39
#define FUNCTION_ROUND 40
#define FUNCTION_MAVG 41
#define FUNCTION_CSUM 42
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TSCALARFUNCTION_H

View File

@ -0,0 +1,147 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TTSZIP_H
#define TDENGINE_TTSZIP_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "taosdef.h"
#include "tvariant.h"
#define MEM_BUF_SIZE (1 << 20)
#define TS_COMP_FILE_MAGIC 0x87F5EC4C
#define TS_COMP_FILE_GROUP_MAX 512
typedef struct STSList {
char* rawBuf;
int32_t allocSize;
int32_t threshold;
int32_t len;
} STSList;
typedef struct STSElem {
TSKEY ts;
tVariant* tag;
int32_t id;
} STSElem;
typedef struct STSCursor {
int32_t vgroupIndex;
int32_t blockIndex;
int32_t tsIndex;
uint32_t order;
} STSCursor;
typedef struct STSBlock {
tVariant tag; // tag value
int32_t numOfElem; // number of elements
int32_t compLen; // size after compressed
int32_t padding; // 0xFFFFFFFF by default, after the payload
char* payload; // actual data that is compressed
} STSBlock;
/*
* The size of buffer file should not be greater than 2G,
* and the offset of int32_t type is enough
*/
typedef struct STSGroupBlockInfo {
int32_t id; // group id
int32_t offset; // offset set value in file
int32_t numOfBlocks; // number of total blocks
int32_t compLen; // compressed size
} STSGroupBlockInfo;
typedef struct STSGroupBlockInfoEx {
STSGroupBlockInfo info;
int32_t len; // length before compress
} STSGroupBlockInfoEx;
typedef struct STSBuf {
FILE* f;
char path[PATH_MAX];
uint32_t fileSize;
// todo use array
STSGroupBlockInfoEx* pData;
uint32_t numOfAlloc;
uint32_t numOfGroups;
char* assistBuf;
int32_t bufSize;
STSBlock block;
STSList tsData; // uncompressed raw ts data
uint64_t numOfTotal;
bool autoDelete;
bool remainOpen;
int32_t tsOrder; // order of timestamp in ts comp buffer
STSCursor cur;
} STSBuf;
typedef struct STSBufFileHeader {
uint32_t magic; // file magic number
uint32_t numOfGroup; // number of group stored in current file
int32_t tsOrder; // timestamp order in current file
} STSBufFileHeader;
STSBuf* tsBufCreate(bool autoDelete, int32_t order);
STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete);
STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t tsOrder, int32_t id);
void* tsBufDestroy(STSBuf* pTSBuf);
void tsBufAppend(STSBuf* pTSBuf, int32_t id, tVariant* tag, const char* pData, int32_t len);
int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf);
STSBuf* tsBufClone(STSBuf* pTSBuf);
STSGroupBlockInfo* tsBufGetGroupBlockInfo(STSBuf* pTSBuf, int32_t id);
void tsBufFlush(STSBuf* pTSBuf);
void tsBufResetPos(STSBuf* pTSBuf);
bool tsBufNextPos(STSBuf* pTSBuf);
STSElem tsBufGetElem(STSBuf* pTSBuf);
STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t id, tVariant* tag);
STSCursor tsBufGetCursor(STSBuf* pTSBuf);
void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order);
void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur);
/**
* display all data in comp block file, for debug purpose only
* @param pTSBuf
*/
void tsBufDisplay(STSBuf* pTSBuf);
int32_t tsBufGetNumOfGroup(STSBuf* pTSBuf);
void tsBufGetGroupIdList(STSBuf* pTSBuf, int32_t* num, int32_t** id);
int32_t dumpFileBlockByGroupId(STSBuf* pTSBuf, int32_t id, void* buf, int32_t* len, int32_t* numOfBlocks);
STSElem tsBufFindElemStartPosByTag(STSBuf* pTSBuf, tVariant* pTag);
bool tsBufIsValidElem(STSElem* pElem);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TTSZIP_H

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TUDF_H
#define TDENGINE_TUDF_H
enum {
TSDB_UDF_FUNC_NORMAL = 0,
TSDB_UDF_FUNC_INIT,
TSDB_UDF_FUNC_FINALIZE,
TSDB_UDF_FUNC_MERGE,
TSDB_UDF_FUNC_DESTROY,
TSDB_UDF_FUNC_MAX_NUM
};
typedef struct SUdfInit {
int32_t maybe_null; /* 1 if function can return NULL */
uint32_t decimals; /* for real functions */
uint64_t length; /* For string functions */
char* ptr; /* free pointer for function data */
int32_t const_item; /* 0 if result is independent of arguments */
// script like lua/javascript
void* script_ctx;
void (*destroyCtxFunc)(void* script_ctx);
} SUdfInit;
typedef struct SUdfInfo {
int32_t functionId; // system assigned function id
int32_t funcType; // scalar function or aggregate function
int8_t resType; // result type
int16_t resBytes; // result byte
int32_t contLen; // content length
int32_t bufSize; // interbuf size
char* name; // function name
void* handle; // handle loaded in mem
void* funcs[TSDB_UDF_FUNC_MAX_NUM]; // function ptr
// for script like lua/javascript only
int isScript;
void* pScriptCtx;
SUdfInit init;
char* content;
char* path;
} SUdfInfo;
// script
typedef int32_t (*scriptInitFunc)(void* pCtx);
typedef void (*scriptNormalFunc)(void* pCtx, char* data, int16_t iType, int16_t iBytes, int32_t numOfRows,
int64_t* ptList, int64_t key, char* dataOutput, char* tsOutput, int32_t* numOfOutput,
int16_t oType, int16_t oBytes);
typedef void (*scriptFinalizeFunc)(void* pCtx, int64_t key, char* dataOutput, int32_t* numOfOutput);
typedef void (*scriptMergeFunc)(void* pCtx, char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput);
typedef void (*scriptDestroyFunc)(void* pCtx);
// dynamic lib
typedef void (*udfNormalFunc)(char* data, int16_t itype, int16_t iBytes, int32_t numOfRows, int64_t* ts,
char* dataOutput, char* interBuf, char* tsOutput, int32_t* numOfOutput, int16_t oType,
int16_t oBytes, SUdfInit* buf);
typedef int32_t (*udfInitFunc)(SUdfInit* data);
typedef void (*udfFinalizeFunc)(char* dataOutput, char* interBuf, int32_t* numOfOutput, SUdfInit* buf);
typedef void (*udfMergeFunc)(char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput, SUdfInit* buf);
typedef void (*udfDestroyFunc)(SUdfInit* buf);
#endif // TDENGINE_TUDF_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,524 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "ttypes.h"
#include "tfill.h"
#include "thash.h"
#include "function.h"
#include "common.h"
#include "ttime.h"
#define FILL_IS_ASC_FILL(_f) ((_f)->order == TSDB_ORDER_ASC)
#define DO_INTERPOLATION(_v1, _v2, _k1, _k2, _k) ((_v1) + ((_v2) - (_v1)) * (((double)(_k)) - ((double)(_k1))) / (((double)(_k2)) - ((double)(_k1))))
#define GET_FORWARD_DIRECTION_FACTOR(_ord) (((_ord) == TSDB_ORDER_ASC)? 1:-1)
static void setTagsValue(SFillInfo* pFillInfo, void** data, int32_t genRows) {
for(int32_t j = 0; j < pFillInfo->numOfCols; ++j) {
SFillColInfo* pCol = &pFillInfo->pFillCol[j];
if (TSDB_COL_IS_NORMAL_COL(pCol->flag) || TSDB_COL_IS_UD_COL(pCol->flag)) {
continue;
}
char* val1 = elePtrAt(data[j], pCol->col.bytes, genRows);
assert(pCol->tagIndex >= 0 && pCol->tagIndex < pFillInfo->numOfTags);
SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex];
assert (pTag->col.colId == pCol->col.colId);
assignVal(val1, pTag->tagVal, pCol->col.bytes, pCol->col.type);
}
}
static void setNullValueForRow(SFillInfo* pFillInfo, void** data, int32_t numOfCol, int32_t rowIndex) {
// the first are always the timestamp column, so start from the second column.
for (int32_t i = 1; i < numOfCol; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
char* output = elePtrAt(data[i], pCol->col.bytes, rowIndex);
setNull(output, pCol->col.type, pCol->col.bytes);
}
}
static void doFillOneRowResult(SFillInfo* pFillInfo, void** data, char** srcData, int64_t ts, bool outOfBound) {
char* prev = pFillInfo->prevValues;
char* next = pFillInfo->nextValues;
SPoint point1, point2, point;
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order);
// set the primary timestamp column value
int32_t index = pFillInfo->numOfCurrent;
char* val = elePtrAt(data[0], TSDB_KEYSIZE, index);
*(TSKEY*) val = pFillInfo->currentKey;
// set the other values
if (pFillInfo->type == TSDB_FILL_PREV) {
char* p = FILL_IS_ASC_FILL(pFillInfo) ? prev : next;
if (p != NULL) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type);
}
} else { // no prev value yet, set the value for NULL
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else if (pFillInfo->type == TSDB_FILL_NEXT) {
char* p = FILL_IS_ASC_FILL(pFillInfo)? next : prev;
if (p != NULL) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type);
}
} else { // no prev value yet, set the value for NULL
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else if (pFillInfo->type == TSDB_FILL_LINEAR) {
// TODO : linear interpolation supports NULL value
if (prev != NULL && !outOfBound) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
int16_t type = pCol->col.type;
int16_t bytes = pCol->col.bytes;
char *val1 = elePtrAt(data[i], pCol->col.bytes, index);
if (type == TSDB_DATA_TYPE_BINARY|| type == TSDB_DATA_TYPE_NCHAR || type == TSDB_DATA_TYPE_BOOL) {
setNull(val1, pCol->col.type, bytes);
continue;
}
point1 = (SPoint){.key = *(TSKEY*)(prev), .val = prev + pCol->col.offset};
point2 = (SPoint){.key = ts, .val = srcData[i] + pFillInfo->index * bytes};
point = (SPoint){.key = pFillInfo->currentKey, .val = val1};
taosGetLinearInterpolationVal(&point, type, &point1, &point2, type);
}
} else {
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else { // fill the default value */
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) {
continue;
}
char* val1 = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(val1, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type);
}
}
setTagsValue(pFillInfo, data, index);
pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step, pFillInfo->interval.slidingUnit, pFillInfo->precision);
pFillInfo->numOfCurrent++;
}
static void initBeforeAfterDataBuf(SFillInfo* pFillInfo, char** next) {
if (*next != NULL) {
return;
}
*next = calloc(1, pFillInfo->rowSize);
for (int i = 1; i < pFillInfo->numOfCols; i++) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
setNull(*next + pCol->col.offset, pCol->col.type, pCol->col.bytes);
}
}
static void copyCurrentRowIntoBuf(SFillInfo* pFillInfo, char** srcData, char* buf) {
int32_t rowIndex = pFillInfo->index;
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
memcpy(buf + pCol->col.offset, srcData[i] + rowIndex * pCol->col.bytes, pCol->col.bytes);
}
}
static int32_t fillResultImpl(SFillInfo* pFillInfo, void** data, int32_t outputRows) {
pFillInfo->numOfCurrent = 0;
char** srcData = pFillInfo->pData;
char** prev = &pFillInfo->prevValues;
char** next = &pFillInfo->nextValues;
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order);
if (FILL_IS_ASC_FILL(pFillInfo)) {
assert(pFillInfo->currentKey >= pFillInfo->start);
} else {
assert(pFillInfo->currentKey <= pFillInfo->start);
}
while (pFillInfo->numOfCurrent < outputRows) {
int64_t ts = ((int64_t*)pFillInfo->pData[0])[pFillInfo->index];
// set the next value for interpolation
if ((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) {
initBeforeAfterDataBuf(pFillInfo, next);
copyCurrentRowIntoBuf(pFillInfo, srcData, *next);
}
if (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) || (pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) &&
pFillInfo->numOfCurrent < outputRows) {
// fill the gap between two actual input rows
while (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) &&
pFillInfo->numOfCurrent < outputRows) {
doFillOneRowResult(pFillInfo, data, srcData, ts, false);
}
// output buffer is full, abort
if (pFillInfo->numOfCurrent == outputRows) {
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
return outputRows;
}
} else {
assert(pFillInfo->currentKey == ts);
initBeforeAfterDataBuf(pFillInfo, prev);
if (pFillInfo->type == TSDB_FILL_NEXT && (pFillInfo->index + 1) < pFillInfo->numOfRows) {
initBeforeAfterDataBuf(pFillInfo, next);
++pFillInfo->index;
copyCurrentRowIntoBuf(pFillInfo, srcData, *next);
--pFillInfo->index;
}
// assign rows to dst buffer
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, pFillInfo->numOfCurrent);
char* src = elePtrAt(srcData[i], pCol->col.bytes, pFillInfo->index);
if (i == 0 || (pCol->functionId != FUNCTION_COUNT && !isNull(src, pCol->col.type)) ||
(pCol->functionId == FUNCTION_COUNT && GET_INT64_VAL(src) != 0)) {
assignVal(output, src, pCol->col.bytes, pCol->col.type);
memcpy(*prev + pCol->col.offset, src, pCol->col.bytes);
} else { // i > 0 and data is null , do interpolation
if (pFillInfo->type == TSDB_FILL_PREV) {
assignVal(output, *prev + pCol->col.offset, pCol->col.bytes, pCol->col.type);
} else if (pFillInfo->type == TSDB_FILL_LINEAR) {
assignVal(output, src, pCol->col.bytes, pCol->col.type);
memcpy(*prev + pCol->col.offset, src, pCol->col.bytes);
} else if (pFillInfo->type == TSDB_FILL_NEXT) {
if (*next) {
assignVal(output, *next + pCol->col.offset, pCol->col.bytes, pCol->col.type);
} else {
setNull(output, pCol->col.type, pCol->col.bytes);
}
} else {
assignVal(output, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type);
}
}
}
// set the tag value for final result
setTagsValue(pFillInfo, data, pFillInfo->numOfCurrent);
pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step,
pFillInfo->interval.slidingUnit, pFillInfo->precision);
pFillInfo->index += 1;
pFillInfo->numOfCurrent += 1;
}
if (pFillInfo->index >= pFillInfo->numOfRows || pFillInfo->numOfCurrent >= outputRows) {
/* the raw data block is exhausted, next value does not exists */
if (pFillInfo->index >= pFillInfo->numOfRows) {
tfree(*next);
}
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
return pFillInfo->numOfCurrent;
}
}
return pFillInfo->numOfCurrent;
}
static int64_t appendFilledResult(SFillInfo* pFillInfo, void** output, int64_t resultCapacity) {
/*
* These data are generated according to fill strategy, since the current timestamp is out of the time window of
* real result set. Note that we need to keep the direct previous result rows, to generated the filled data.
*/
pFillInfo->numOfCurrent = 0;
while (pFillInfo->numOfCurrent < resultCapacity) {
doFillOneRowResult(pFillInfo, output, pFillInfo->pData, pFillInfo->start, true);
}
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
assert(pFillInfo->numOfCurrent == resultCapacity);
return resultCapacity;
}
// there are no duplicated tags in the SFillTagColInfo list
static int32_t setTagColumnInfo(SFillInfo* pFillInfo, int32_t numOfCols, int32_t capacity) {
int32_t rowsize = 0;
int32_t numOfTags = 0;
int32_t k = 0;
for (int32_t i = 0; i < numOfCols; ++i) {
SFillColInfo* pColInfo = &pFillInfo->pFillCol[i];
pFillInfo->pData[i] = NULL;
if (TSDB_COL_IS_TAG(pColInfo->flag) || pColInfo->col.type == TSDB_DATA_TYPE_BINARY) {
numOfTags += 1;
bool exists = false;
int32_t index = -1;
for (int32_t j = 0; j < k; ++j) {
if (pFillInfo->pTags[j].col.colId == pColInfo->col.colId) {
exists = true;
index = j;
break;
}
}
if (!exists) {
SSchema* pSchema = &pFillInfo->pTags[k].col;
pSchema->colId = pColInfo->col.colId;
pSchema->type = pColInfo->col.type;
pSchema->bytes = pColInfo->col.bytes;
pFillInfo->pTags[k].tagVal = calloc(1, pColInfo->col.bytes);
pColInfo->tagIndex = k;
k += 1;
} else {
pColInfo->tagIndex = index;
}
}
rowsize += pColInfo->col.bytes;
}
pFillInfo->numOfTags = numOfTags;
assert(k <= pFillInfo->numOfTags);
return rowsize;
}
static int32_t taosNumOfRemainRows(SFillInfo* pFillInfo) {
if (pFillInfo->numOfRows == 0 || (pFillInfo->numOfRows > 0 && pFillInfo->index >= pFillInfo->numOfRows)) {
return 0;
}
return pFillInfo->numOfRows - pFillInfo->index;
}
SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols,
int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType,
SFillColInfo* pCol, void* handle) {
if (fillType == TSDB_FILL_NONE) {
return NULL;
}
SFillInfo* pFillInfo = calloc(1, sizeof(SFillInfo));
taosResetFillInfo(pFillInfo, skey);
pFillInfo->order = order;
pFillInfo->type = fillType;
pFillInfo->pFillCol = pCol;
pFillInfo->numOfTags = numOfTags;
pFillInfo->numOfCols = numOfCols;
pFillInfo->precision = precision;
pFillInfo->alloc = capacity;
pFillInfo->handle = handle;
pFillInfo->interval.interval = slidingTime;
pFillInfo->interval.intervalUnit = slidingUnit;
pFillInfo->interval.sliding = slidingTime;
pFillInfo->interval.slidingUnit = slidingUnit;
pFillInfo->pData = malloc(POINTER_BYTES * numOfCols);
// if (numOfTags > 0) {
pFillInfo->pTags = calloc(numOfCols, sizeof(SFillTagColInfo));
for (int32_t i = 0; i < numOfCols; ++i) {
pFillInfo->pTags[i].col.colId = -2; // TODO
}
// }
pFillInfo->rowSize = setTagColumnInfo(pFillInfo, pFillInfo->numOfCols, pFillInfo->alloc);
assert(pFillInfo->rowSize > 0);
return pFillInfo;
}
void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp) {
pFillInfo->start = startTimestamp;
pFillInfo->currentKey = startTimestamp;
pFillInfo->end = startTimestamp;
pFillInfo->index = -1;
pFillInfo->numOfRows = 0;
pFillInfo->numOfCurrent = 0;
pFillInfo->numOfTotal = 0;
}
void* taosDestroyFillInfo(SFillInfo* pFillInfo) {
if (pFillInfo == NULL) {
return NULL;
}
tfree(pFillInfo->prevValues);
tfree(pFillInfo->nextValues);
for(int32_t i = 0; i < pFillInfo->numOfTags; ++i) {
tfree(pFillInfo->pTags[i].tagVal);
}
tfree(pFillInfo->pTags);
tfree(pFillInfo->pData);
tfree(pFillInfo->pFillCol);
tfree(pFillInfo);
return NULL;
}
void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey) {
if (pFillInfo->type == TSDB_FILL_NONE) {
return;
}
pFillInfo->end = endKey;
if (!FILL_IS_ASC_FILL(pFillInfo)) {
pFillInfo->end = taosTimeTruncate(endKey, &pFillInfo->interval, pFillInfo->precision);
}
pFillInfo->index = 0;
pFillInfo->numOfRows = numOfRows;
}
void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const SSDataBlock* pInput) {
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
SColumnInfoData* pColData = taosArrayGet(pInput->pDataBlock, i);
pFillInfo->pData[i] = pColData->pData;
if (TSDB_COL_IS_TAG(pCol->flag)) { // copy the tag value to tag value buffer
SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex];
assert (pTag->col.colId == pCol->col.colId);
memcpy(pTag->tagVal, pColData->pData, pCol->col.bytes); // TODO not memcpy??
}
}
}
bool taosFillHasMoreResults(SFillInfo* pFillInfo) {
int32_t remain = taosNumOfRemainRows(pFillInfo);
if (remain > 0) {
return true;
}
if (pFillInfo->numOfTotal > 0 && (((pFillInfo->end > pFillInfo->start) && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->end < pFillInfo->start && !FILL_IS_ASC_FILL(pFillInfo)))) {
return getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, 4096) > 0;
}
return false;
}
int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, TSKEY ekey, int32_t maxNumOfRows) {
int64_t* tsList = (int64_t*) pFillInfo->pData[0];
int32_t numOfRows = taosNumOfRemainRows(pFillInfo);
TSKEY ekey1 = ekey;
if (!FILL_IS_ASC_FILL(pFillInfo)) {
pFillInfo->end = taosTimeTruncate(ekey, &pFillInfo->interval, pFillInfo->precision);
}
int64_t numOfRes = -1;
if (numOfRows > 0) { // still fill gap within current data block, not generating data after the result set.
TSKEY lastKey = tsList[pFillInfo->numOfRows - 1];
numOfRes = taosTimeCountInterval(
lastKey,
pFillInfo->currentKey,
pFillInfo->interval.sliding,
pFillInfo->interval.slidingUnit,
pFillInfo->precision);
numOfRes += 1;
assert(numOfRes >= numOfRows);
} else { // reach the end of data
if ((ekey1 < pFillInfo->currentKey && FILL_IS_ASC_FILL(pFillInfo)) ||
(ekey1 > pFillInfo->currentKey && !FILL_IS_ASC_FILL(pFillInfo))) {
return 0;
}
numOfRes = taosTimeCountInterval(
ekey1,
pFillInfo->currentKey,
pFillInfo->interval.sliding,
pFillInfo->interval.slidingUnit,
pFillInfo->precision);
numOfRes += 1;
}
return (numOfRes > maxNumOfRows) ? maxNumOfRows : numOfRes;
}
int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType) {
double v1 = -1, v2 = -1;
GET_TYPED_DATA(v1, double, inputType, point1->val);
GET_TYPED_DATA(v2, double, inputType, point2->val);
double r = DO_INTERPOLATION(v1, v2, point1->key, point2->key, point->key);
SET_TYPED_DATA(point->val, outputType, r);
return TSDB_CODE_SUCCESS;
}
int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity) {
int32_t remain = taosNumOfRemainRows(pFillInfo);
int64_t numOfRes = getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, capacity);
assert(numOfRes <= capacity);
// no data existed for fill operation now, append result according to the fill strategy
if (remain == 0) {
appendFilledResult(pFillInfo, output, numOfRes);
} else {
fillResultImpl(pFillInfo, output, (int32_t) numOfRes);
assert(numOfRes == pFillInfo->numOfCurrent);
}
// qDebug("fill:%p, generated fill result, src block:%d, index:%d, brange:%"PRId64"-%"PRId64", currentKey:%"PRId64", current:%d, total:%d, %p",
// pFillInfo, pFillInfo->numOfRows, pFillInfo->index, pFillInfo->start, pFillInfo->end, pFillInfo->currentKey, pFillInfo->numOfCurrent,
// pFillInfo->numOfTotal, pFillInfo->handle);
return numOfRes;
}

View File

@ -0,0 +1,410 @@
#include "os.h"
#include "tarray.h"
#include "function.h"
#include "thash.h"
#include "taggfunction.h"
#include "tscalarfunction.h"
static SHashObj* functionHashTable = NULL;
static void doInitFunctionHashTable() {
int numOfEntries = tListLen(aggFunc);
functionHashTable = taosHashInit(numOfEntries, MurmurHash3_32, false, false);
for (int32_t i = 0; i < numOfEntries; i++) {
int32_t len = (uint32_t)strlen(aggFunc[i].name);
taosHashPut(functionHashTable, aggFunc[i].name, len, (void*)&aggFunc[i], POINTER_BYTES);
}
numOfEntries = tListLen(scalarFunc);
for(int32_t i = 0; i < numOfEntries; ++i) {
int32_t len = (int32_t) strlen(scalarFunc[i].name);
taosHashPut(functionHashTable, scalarFunc[i].name, len, (void*)&scalarFunc[i], POINTER_BYTES);
}
}
static pthread_once_t functionHashTableInit = PTHREAD_ONCE_INIT;
int32_t qIsBuiltinFunction(const char* name, int32_t len) {
pthread_once(&functionHashTableInit, doInitFunctionHashTable);
SAggFunctionInfo** pInfo = taosHashGet(functionHashTable, name, len);
if (pInfo != NULL) {
return (*pInfo)->functionId;
} else {
return -1;
}
}
bool isTagsQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int16_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
// "select count(tbname)" query
// if (functId == FUNCTION_COUNT && pExpr->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
// continue;
// }
if (f != FUNCTION_TAGPRJ && f != FUNCTION_TID_TAG) {
return false;
}
}
return true;
}
//bool tscMultiRoundQuery(SArray* pFunctionIdList, int32_t index) {
// if (!UTIL_TABLE_IS_SUPER_TABLE(pQueryInfo->pTableMetaInfo[index])) {
// return false;
// }
//
// size_t numOfExprs = (int32_t) getNumOfExprs(pQueryInfo);
// for(int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr->base.functionId == FUNCTION_STDDEV_DST) {
// return true;
// }
// }
//
// return false;
//}
bool isBlockInfoQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_BLKINFO) {
return true;
}
}
return false;
}
bool isProjectionQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS_DUMMY) {
continue;
}
if (f != FUNCTION_PRJ && f != FUNCTION_TAGPRJ && f != FUNCTION_TAG &&
f != FUNCTION_TS && f != FUNCTION_ARITHM && f != FUNCTION_DIFF &&
f != FUNCTION_DERIVATIVE) {
return false;
}
}
return true;
}
bool isDiffDerivQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS_DUMMY) {
continue;
}
if (f == FUNCTION_DIFF || f == FUNCTION_DERIVATIVE) {
return true;
}
}
return false;
}
bool isPointInterpQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TAG || f == FUNCTION_TS) {
continue;
}
if (f != FUNCTION_INTERP) {
return false;
}
}
return true;
}
bool isArithmeticQueryOnAggResult(SArray* pFunctionIdList) {
if (isProjectionQuery(pFunctionIdList)) {
return false;
}
assert(0);
// size_t numOfOutput = getNumOfFields(pQueryInfo);
// for(int32_t i = 0; i < numOfOutput; ++i) {
// SExprInfo* pExprInfo = tscFieldInfoGetInternalField(&pQueryInfo->fieldsInfo, i)->pExpr;
// if (pExprInfo->pExpr != NULL) {
// return true;
// }
// }
return false;
}
bool isGroupbyColumn(SArray* pFunctionIdList) {
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
// int32_t numOfCols = getNumOfColumns(pTableMetaInfo->pTableMeta);
//
// SGroupbyExpr* pGroupbyExpr = &pQueryInfo->groupbyExpr;
// for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
// SColIndex* pIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
// if (!TSDB_COL_IS_TAG(pIndex->flag) && pIndex->colIndex < numOfCols) { // group by normal columns
// return true;
// }
// }
return false;
}
bool isTopBotQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS) {
continue;
}
if (f == FUNCTION_TOP || f == FUNCTION_BOTTOM) {
return true;
}
}
return false;
}
bool isTsCompQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
if (num != 1) {
return false;
}
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0);
return f == FUNCTION_TS_COMP;
}
bool isTWAQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TWA) {
return true;
}
}
return false;
}
bool isIrateQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_IRATE) {
return true;
}
}
return false;
}
bool isStabledev(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_STDDEV_DST) {
return true;
}
}
return false;
}
bool needReverseScan(SArray* pFunctionIdList) {
assert(0);
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS || f == FUNCTION_TS_DUMMY || f == FUNCTION_TAG) {
continue;
}
// if ((f == FUNCTION_FIRST || f == FUNCTION_FIRST_DST) && pQueryInfo->order.order == TSDB_ORDER_DESC) {
// return true;
// }
if (f == FUNCTION_LAST || f == FUNCTION_LAST_DST) {
// the scan order to acquire the last result of the specified column
// int32_t order = (int32_t)pExpr->base.param[0].i64;
// if (order != pQueryInfo->order.order) {
// return true;
// }
}
}
return false;
}
bool isSimpleAggregateRv(SArray* pFunctionIdList) {
assert(0);
// if (pQueryInfo->interval.interval > 0 || pQueryInfo->sessionWindow.gap > 0) {
// return false;
// }
//
// if (tscIsDiffDerivQuery(pQueryInfo)) {
// return false;
// }
//
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// for (int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr == NULL) {
// continue;
// }
//
// int32_t functionId = pExpr->base.functionId;
// if (functionId < 0) {
// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1);
// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) {
// return true;
// }
//
// continue;
// }
//
// if (functionId == FUNCTION_TS || functionId == FUNCTION_TS_DUMMY) {
// continue;
// }
//
// if ((!IS_MULTIOUTPUT(aAggs[functionId].status)) ||
// (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM || functionId == FUNCTION_TS_COMP)) {
// return true;
// }
// }
return false;
}
bool isBlockDistQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0);
return (num == 1 && f == FUNCTION_BLKINFO);
}
bool isTwoStageSTableQuery(SArray* pFunctionIdList, int32_t tableIndex) {
// if (pQueryInfo == NULL) {
// return false;
// }
//
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex);
// if (pTableMetaInfo == NULL) {
// return false;
// }
//
// if ((pQueryInfo->type & TSDB_QUERY_TYPE_FREE_RESOURCE) == TSDB_QUERY_TYPE_FREE_RESOURCE) {
// return false;
// }
//
// // for ordered projection query, iterate all qualified vnodes sequentially
// if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, tableIndex)) {
// return false;
// }
//
// if (!TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_STABLE_SUBQUERY) && pQueryInfo->command == TSDB_SQL_SELECT) {
// return UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo);
// }
return false;
}
bool isProjectionQueryOnSTable(SArray* pFunctionIdList, int32_t tableIndex) {
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex);
//
// /*
// * In following cases, return false for non ordered project query on super table
// * 1. failed to get tableMeta from server; 2. not a super table; 3. limitation is 0;
// * 4. show queries, instead of a select query
// */
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// if (pTableMetaInfo == NULL || !UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo) ||
// pQueryInfo->command == TSDB_SQL_RETRIEVE_EMPTY_RESULT || numOfExprs == 0) {
// return false;
// }
//
// for (int32_t i = 0; i < numOfExprs; ++i) {
// int32_t functionId = getExprInfo(pQueryInfo, i)->base.functionId;
//
// if (functionId < 0) {
// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1);
// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) {
// return false;
// }
//
// continue;
// }
//
// if (functionId != FUNCTION_PRJ &&
// functionId != FUNCTION_TAGPRJ &&
// functionId != FUNCTION_TAG &&
// functionId != FUNCTION_TS &&
// functionId != FUNCTION_ARITHM &&
// functionId != FUNCTION_TS_COMP &&
// functionId != FUNCTION_DIFF &&
// functionId != FUNCTION_DERIVATIVE &&
// functionId != FUNCTION_TS_DUMMY &&
// functionId != FUNCTION_TID_TAG) {
// return false;
// }
// }
return true;
}
bool hasTagValOutput(SArray* pFunctionIdList) {
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// SExprInfo* pExpr1 = getExprInfo(pQueryInfo, 0);
//
// if (numOfExprs == 1 && pExpr1->base.functionId == FUNCTION_TS_COMP) {
// return true;
// }
//
// for (int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr == NULL) {
// continue;
// }
//
// // ts_comp column required the tag value for join filter
// if (TSDB_COL_IS_TAG(pExpr->base.colInfo.flag)) {
// return true;
// }
// }
return false;
}
bool timeWindowInterpoRequired(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TWA || f == FUNCTION_INTERP) {
return true;
}
}
return false;
}
//SQueryType setQueryType(SArray* pFunctionIdList) {
// assert(pFunctionIdList != NULL);
//
//
//}

View File

@ -0,0 +1,578 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "thistogram.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "tlosertree.h"
/**
*
* implement the histogram and percentile_approx based on the paper:
* Yael Ben-Haim, Elad Tom-Tov. A Streaming Parallel Decision Tree Algorithm,
* The Journal of Machine Learning Research.Volume 11, 3/1/2010 pp.849-872
* https://dl.acm.org/citation.cfm?id=1756034
*
* @data 2018-12-14
* @version 0.1
*
*/
static int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val);
SHistogramInfo* tHistogramCreate(int32_t numOfEntries) {
/* need one redundant slot */
SHistogramInfo* pHisto = malloc(sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfEntries + 1));
#if !defined(USE_ARRAYLIST)
pHisto->pList = SSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_DOUBLE, sizeof(double));
SInsertSupporter* pss = malloc(sizeof(SInsertSupporter));
pss->numOfEntries = pHisto->maxEntries;
pss->pSkipList = pHisto->pList;
int32_t ret = tLoserTreeCreate1(&pHisto->pLoserTree, numOfEntries, pss, compare);
pss->pTree = pHisto->pLoserTree;
#endif
return tHistogramCreateFrom(pHisto, numOfEntries);
}
SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins) {
memset(pBuf, 0, sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfBins + 1));
SHistogramInfo* pHisto = (SHistogramInfo*)pBuf;
pHisto->elems = (SHistBin*)((char*)pBuf + sizeof(SHistogramInfo));
for(int32_t i = 0; i < numOfBins; ++i) {
pHisto->elems[i].val = -DBL_MAX;
}
pHisto->maxEntries = numOfBins;
pHisto->min = DBL_MAX;
pHisto->max = -DBL_MAX;
return pBuf;
}
int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) {
if (*pHisto == NULL) {
*pHisto = tHistogramCreate(MAX_HISTOGRAM_BIN);
}
#if defined(USE_ARRAYLIST)
int32_t idx = histoBinarySearch((*pHisto)->elems, (*pHisto)->numOfEntries, val);
assert(idx >= 0 && idx <= (*pHisto)->maxEntries && (*pHisto)->elems != NULL);
if ((*pHisto)->elems[idx].val == val && idx >= 0) {
(*pHisto)->elems[idx].num += 1;
if ((*pHisto)->numOfEntries == 0) {
(*pHisto)->numOfEntries += 1;
}
} else { /* insert a new slot */
if ((*pHisto)->numOfElems >= 1 && idx < (*pHisto)->numOfEntries) {
if (idx > 0) {
assert((*pHisto)->elems[idx - 1].val <= val);
} else {
assert((*pHisto)->elems[idx].val > val);
}
} else if ((*pHisto)->numOfElems > 0) {
assert((*pHisto)->elems[(*pHisto)->numOfEntries].val <= val);
}
histogramCreateBin(*pHisto, idx, val);
}
#else
tSkipListKey key = tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &val, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize);
SHistBin* entry = calloc(1, sizeof(SHistBin));
entry->val = val;
tSkipListNode* pResNode = SSkipListPut((*pHisto)->pList, entry, &key, 0);
SHistBin* pEntry1 = (SHistBin*)pResNode->pData;
pEntry1->index = -1;
tSkipListNode* pLast = NULL;
if (pEntry1->num == 0) { /* it is a new node */
(*pHisto)->numOfEntries += 1;
pEntry1->num += 1;
/* number of entries reaches the upper limitation */
if (pResNode->pForward[0] != NULL) {
/* we need to update the last updated slot in loser tree*/
pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val;
if ((*pHisto)->ordered) {
int32_t lastIndex = (*pHisto)->maxIndex;
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
(*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].pData = pResNode;
pEntry1->index = (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].index;
// update the loser tree
if ((*pHisto)->ordered) {
tLoserTreeAdjust(pTree, pEntry1->index + pTree->numOfEntries);
}
tSkipListKey kx =
tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &(*pHisto)->max, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize);
pLast = tSkipListGetOne((*pHisto)->pList, &kx);
}
} else {
/* this node located at the last position of the skiplist, we do not
* update the loser-tree */
pEntry1->delta = DBL_MAX;
pLast = pResNode;
}
if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) {
SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData;
pPrevEntry->delta = val - pPrevEntry->val;
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
if ((*pHisto)->ordered) {
tLoserTreeAdjust(pTree, pPrevEntry->index + pTree->numOfEntries);
tLoserTreeDisplay(pTree);
}
}
if ((*pHisto)->numOfEntries >= (*pHisto)->maxEntries + 1) {
// set the right value for loser-tree
assert((*pHisto)->pLoserTree != NULL);
if (!(*pHisto)->ordered) {
SSkipListPrint((*pHisto)->pList, 1);
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
tSkipListNode* pHead = (*pHisto)->pList->pHead.pForward[0];
tSkipListNode* p1 = pHead;
printf("\n");
while (p1 != NULL) {
printf("%f\t", ((SHistBin*)(p1->pData))->delta);
p1 = p1->pForward[0];
}
printf("\n");
/* last one in skiplist is ignored */
for (int32_t i = pTree->numOfEntries; i < pTree->totalEntries; ++i) {
pTree->pNode[i].pData = pHead;
pTree->pNode[i].index = i - pTree->numOfEntries;
SHistBin* pBin = (SHistBin*)pHead->pData;
pBin->index = pTree->pNode[i].index;
pHead = pHead->pForward[0];
}
pLast = pHead;
for (int32_t i = 0; i < pTree->numOfEntries; ++i) {
pTree->pNode[i].index = -1;
}
tLoserTreeDisplay(pTree);
for (int32_t i = pTree->totalEntries - 1; i >= pTree->numOfEntries; i--) {
tLoserTreeAdjust(pTree, i);
}
tLoserTreeDisplay(pTree);
(*pHisto)->ordered = true;
}
printf("delta is:%lf\n", pEntry1->delta);
SSkipListPrint((*pHisto)->pList, 1);
/* the chosen node */
tSkipListNode* pNode = (*pHisto)->pLoserTree->pNode[0].pData;
SHistBin* pEntry = (SHistBin*)pNode->pData;
tSkipListNode* pNext = pNode->pForward[0];
SHistBin* pNextEntry = (SHistBin*)pNext->pData;
assert(pNextEntry->val - pEntry->val == pEntry->delta);
double newVal = (pEntry->val * pEntry->num + pNextEntry->val * pNextEntry->num) / (pEntry->num + pNextEntry->num);
pEntry->val = newVal;
pNode->key.dKey = newVal;
pEntry->num = pEntry->num + pNextEntry->num;
// update delta value in current node
pEntry->delta = (pNextEntry->delta + pNextEntry->val) - pEntry->val;
// reset delta value in the previous node
SHistBin* pPrevEntry = (SHistBin*)pNode->pBackward[0]->pData;
if (pPrevEntry) {
pPrevEntry->delta = pEntry->val - pPrevEntry->val;
}
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
if (pNextEntry->index != -1) {
(*pHisto)->maxIndex = pNextEntry->index;
// set the last element in skiplist, of which delta is FLT_MAX;
pTree->pNode[pNextEntry->index + pTree->numOfEntries].pData = pLast;
((SHistBin*)pLast->pData)->index = pNextEntry->index;
int32_t f = pTree->pNode[pNextEntry->index + pTree->numOfEntries].index;
printf("disappear index is:%d\n", f);
}
tLoserTreeAdjust(pTree, pEntry->index + pTree->numOfEntries);
// remove the next node in skiplist
tSkipListRemoveNode((*pHisto)->pList, pNext);
SSkipListPrint((*pHisto)->pList, 1);
tLoserTreeDisplay((*pHisto)->pLoserTree);
} else { // add to heap
if (pResNode->pForward[0] != NULL) {
pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val;
} else {
pEntry1->delta = DBL_MAX;
}
if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) {
SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData;
pEntry1->delta = val - pPrevEntry->val;
}
printf("delta is:%9lf\n", pEntry1->delta);
}
} else {
SHistBin* pEntry = (SHistBin*)pResNode->pData;
assert(pEntry->val == val);
pEntry->num += 1;
}
#endif
if (val > (*pHisto)->max) {
(*pHisto)->max = val;
}
if (val < (*pHisto)->min) {
(*pHisto)->min = val;
}
(*pHisto)->numOfElems += 1;
return 0;
}
int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val) {
int32_t end = len - 1;
int32_t start = 0;
while (start <= end) {
int32_t mid = (end - start) / 2 + start;
if (pEntry[mid].val == val) {
return mid;
}
if (pEntry[mid].val < val) {
start = mid + 1;
} else {
end = mid - 1;
}
}
int32_t ret = start > end ? start : end;
if (ret < 0) {
return 0;
} else {
return ret;
}
}
static void histogramMergeImpl(SHistBin* pHistBin, int32_t* size) {
#if defined(USE_ARRAYLIST)
int32_t oldSize = *size;
double delta = DBL_MAX;
int32_t index = -1;
for (int32_t i = 1; i < oldSize; ++i) {
double d = pHistBin[i].val - pHistBin[i - 1].val;
if (d < delta) {
delta = d;
index = i - 1;
}
}
SHistBin* s1 = &pHistBin[index];
SHistBin* s2 = &pHistBin[index + 1];
double newVal = (s1->val * s1->num + s2->val * s2->num) / (s1->num + s2->num);
s1->val = newVal;
s1->num = s1->num + s2->num;
memmove(&pHistBin[index + 1], &pHistBin[index + 2], (oldSize - index - 2) * sizeof(SHistBin));
(*size) -= 1;
#endif
}
/* optimize this procedure */
int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val) {
#if defined(USE_ARRAYLIST)
int32_t remain = pHisto->numOfEntries - index;
if (remain > 0) {
memmove(&pHisto->elems[index + 1], &pHisto->elems[index], sizeof(SHistBin) * remain);
}
assert(index >= 0 && index <= pHisto->maxEntries);
pHisto->elems[index].num = 1;
pHisto->elems[index].val = val;
pHisto->numOfEntries += 1;
/* we need to merge the slot */
if (pHisto->numOfEntries == pHisto->maxEntries + 1) {
histogramMergeImpl(pHisto->elems, &pHisto->numOfEntries);
pHisto->elems[pHisto->maxEntries].val = 0;
pHisto->elems[pHisto->maxEntries].num = 0;
}
#endif
assert(pHisto->numOfEntries <= pHisto->maxEntries);
return 0;
}
void tHistogramDestroy(SHistogramInfo** pHisto) {
if (*pHisto == NULL) {
return;
}
free(*pHisto);
*pHisto = NULL;
}
void tHistogramPrint(SHistogramInfo* pHisto) {
printf("total entries: %d, elements: %"PRId64 "\n", pHisto->numOfEntries, pHisto->numOfElems);
#if defined(USE_ARRAYLIST)
for (int32_t i = 0; i < pHisto->numOfEntries; ++i) {
printf("%d: (%f, %" PRId64 ")\n", i + 1, pHisto->elems[i].val, pHisto->elems[i].num);
}
#else
tSkipListNode* pNode = pHisto->pList->pHead.pForward[0];
for (int32_t i = 0; i < pHisto->numOfEntries; ++i) {
SHistBin* pEntry = (SHistBin*)pNode->pData;
printf("%d: (%f, %" PRId64 ")\n", i + 1, pEntry->val, pEntry->num);
pNode = pNode->pForward[0];
}
#endif
}
/**
* Estimated number of points in the interval (inf,b].
* @param pHisto
* @param v
*/
int64_t tHistogramSum(SHistogramInfo* pHisto, double v) {
#if defined(USE_ARRAYLIST)
int32_t slotIdx = histoBinarySearch(pHisto->elems, pHisto->numOfEntries, v);
if (pHisto->elems[slotIdx].val != v) {
slotIdx -= 1;
if (slotIdx < 0) {
slotIdx = 0;
assert(v <= pHisto->elems[slotIdx].val);
} else {
assert(v >= pHisto->elems[slotIdx].val);
if (slotIdx + 1 < pHisto->numOfEntries) {
assert(v < pHisto->elems[slotIdx + 1].val);
}
}
}
double m1 = (double)pHisto->elems[slotIdx].num;
double v1 = pHisto->elems[slotIdx].val;
double m2 = (double)pHisto->elems[slotIdx + 1].num;
double v2 = pHisto->elems[slotIdx + 1].val;
double estNum = m1 + (m2 - m1) * (v - v1) / (v2 - v1);
double s1 = (m1 + estNum) * (v - v1) / (2 * (v2 - v1));
for (int32_t i = 0; i < slotIdx; ++i) {
s1 += pHisto->elems[i].num;
}
s1 = s1 + m1 / 2;
return (int64_t)s1;
#endif
}
double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num) {
#if defined(USE_ARRAYLIST)
double* pVal = malloc(num * sizeof(double));
for (int32_t i = 0; i < num; ++i) {
double numOfElem = (ratio[i] / 100) * pHisto->numOfElems;
if (numOfElem == 0) {
pVal[i] = pHisto->min;
continue;
} else if (numOfElem <= pHisto->elems[0].num) {
pVal[i] = pHisto->elems[0].val;
continue;
} else if (numOfElem == pHisto->numOfElems) {
pVal[i] = pHisto->max;
continue;
}
int32_t j = 0;
int64_t total = 0;
while (j < pHisto->numOfEntries) {
total += pHisto->elems[j].num;
if (total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem) {
break;
}
j += 1;
}
assert(total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem);
double delta = numOfElem - total;
if (fabs(delta) < FLT_EPSILON) {
pVal[i] = pHisto->elems[j].val;
}
double start = (double)pHisto->elems[j].num;
double range = pHisto->elems[j + 1].num - start;
if (range == 0) {
pVal[i] = (pHisto->elems[j + 1].val - pHisto->elems[j].val) * delta / start + pHisto->elems[j].val;
} else {
double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range);
pVal[i] = pHisto->elems[j].val + (pHisto->elems[j + 1].val - pHisto->elems[j].val) * factor;
}
}
#else
double* pVal = malloc(num * sizeof(double));
for (int32_t i = 0; i < num; ++i) {
double numOfElem = ratio[i] * pHisto->numOfElems;
tSkipListNode* pFirst = pHisto->pList->pHead.pForward[0];
SHistBin* pEntry = (SHistBin*)pFirst->pData;
if (numOfElem == 0) {
pVal[i] = pHisto->min;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
} else if (numOfElem <= pEntry->num) {
pVal[i] = pEntry->val;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
} else if (numOfElem == pHisto->numOfElems) {
pVal[i] = pHisto->max;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
}
int32_t j = 0;
int64_t total = 0;
SHistBin* pPrev = pEntry;
while (j < pHisto->numOfEntries) {
if (total <= numOfElem && total + pEntry->num > numOfElem) {
break;
}
total += pEntry->num;
pPrev = pEntry;
pFirst = pFirst->pForward[0];
pEntry = (SHistBin*)pFirst->pData;
j += 1;
}
assert(total <= numOfElem && total + pEntry->num > numOfElem);
double delta = numOfElem - total;
if (fabs(delta) < FLT_EPSILON) {
// printf("i/numofSlot: %f, v:%f, %f\n",
// (double)i/numOfSlots, numOfElem, pHisto->elems[j].val);
pVal[i] = pPrev->val;
}
double start = pPrev->num;
double range = pEntry->num - start;
if (range == 0) {
pVal[i] = (pEntry->val - pPrev->val) * delta / start + pPrev->val;
} else {
double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range);
pVal[i] = pPrev->val + (pEntry->val - pPrev->val) * factor;
}
// printf("i/numofSlot: %f, v:%f, %f\n", (double)i/numOfSlots,
// numOfElem, val);
}
#endif
return pVal;
}
SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries) {
SHistogramInfo* pResHistogram = tHistogramCreate(numOfEntries);
// error in histogram info
if (pHisto1->numOfEntries > MAX_HISTOGRAM_BIN || pHisto2->numOfEntries > MAX_HISTOGRAM_BIN) {
return pResHistogram;
}
SHistBin* pHistoBins = calloc(1, sizeof(SHistBin) * (pHisto1->numOfEntries + pHisto2->numOfEntries));
int32_t i = 0, j = 0, k = 0;
while (i < pHisto1->numOfEntries && j < pHisto2->numOfEntries) {
if (pHisto1->elems[i].val < pHisto2->elems[j].val) {
pHistoBins[k++] = pHisto1->elems[i++];
} else if (pHisto1->elems[i].val > pHisto2->elems[j].val) {
pHistoBins[k++] = pHisto2->elems[j++];
} else {
pHistoBins[k] = pHisto1->elems[i++];
pHistoBins[k++].num += pHisto2->elems[j++].num;
}
}
if (i < pHisto1->numOfEntries) {
int32_t remain = pHisto1->numOfEntries - i;
memcpy(&pHistoBins[k], &pHisto1->elems[i], sizeof(SHistBin) * remain);
k += remain;
}
if (j < pHisto2->numOfEntries) {
int32_t remain = pHisto2->numOfEntries - j;
memcpy(&pHistoBins[k], &pHisto2->elems[j], sizeof(SHistBin) * remain);
k += remain;
}
/* update other information */
pResHistogram->numOfElems = pHisto1->numOfElems + pHisto2->numOfElems;
pResHistogram->min = (pHisto1->min < pHisto2->min) ? pHisto1->min : pHisto2->min;
pResHistogram->max = (pHisto1->max > pHisto2->max) ? pHisto1->max : pHisto2->max;
while (k > numOfEntries) {
histogramMergeImpl(pHistoBins, &k);
}
pResHistogram->numOfEntries = k;
memcpy(pResHistogram->elems, pHistoBins, sizeof(SHistBin) * k);
free(pHistoBins);
return pResHistogram;
}

View File

@ -0,0 +1,534 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "tpercentile.h"
#include "tpagedfile.h"
#include "taosdef.h"
#include "tcompare.h"
#include "ttypes.h"
#define DEFAULT_NUM_OF_SLOT 1024
int32_t getGroupId(int32_t numOfSlots, int32_t slotIndex, int32_t times) {
return (times * numOfSlots) + slotIndex;
}
static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) {
SFilePage *buffer = (SFilePage *)calloc(1, pMemBucket->bytes * pMemBucket->pSlots[slotIdx].info.size + sizeof(SFilePage));
int32_t groupId = getGroupId(pMemBucket->numOfSlots, slotIdx, pMemBucket->times);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
int32_t offset = 0;
for(int32_t i = 0; i < list->size; ++i) {
SPageInfo* pgInfo = *(SPageInfo**) taosArrayGet(list, i);
SFilePage* pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes));
offset += (int32_t)(pg->num * pMemBucket->bytes);
}
qsort(buffer->data, pMemBucket->pSlots[slotIdx].info.size, pMemBucket->bytes, pMemBucket->comparFn);
return buffer;
}
static void resetBoundingBox(MinMaxEntry* range, int32_t type) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MaxVal = INT64_MIN;
range->i64MinVal = INT64_MAX;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
range->u64MaxVal = 0;
range->u64MinVal = UINT64_MAX;
} else {
range->dMaxVal = -DBL_MAX;
range->dMinVal = DBL_MAX;
}
}
static int32_t setBoundingBox(MinMaxEntry* range, int16_t type, double minval, double maxval) {
if (minval > maxval) {
return -1;
}
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MinVal = (int64_t) minval;
range->i64MaxVal = (int64_t) maxval;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)){
range->u64MinVal = (uint64_t) minval;
range->u64MaxVal = (uint64_t) maxval;
} else {
range->dMinVal = minval;
range->dMaxVal = maxval;
}
return 0;
}
static void resetPosInfo(SSlotInfo* pInfo) {
pInfo->size = 0;
pInfo->pageId = -1;
pInfo->data = NULL;
}
double findOnlyResult(tMemBucket *pMemBucket) {
assert(pMemBucket->total == 1);
for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[i];
if (pSlot->info.size == 0) {
continue;
}
int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
assert(list->size == 1);
SPageInfo* pgInfo = (SPageInfo*) taosArrayGetP(list, 0);
SFilePage* pPage = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
assert(pPage->num == 1);
double v = 0;
GET_TYPED_DATA(v, double, pMemBucket->type, pPage->data);
return v;
}
return 0;
}
int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, pBucket->type, value);
int32_t index = -1;
if (v > pBucket->range.i64MaxVal || v < pBucket->range.i64MinVal) {
return index;
}
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal;
if (span < pBucket->numOfSlots) {
int64_t delta = v - pBucket->range.i64MinVal;
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.i64MinVal) / slotSpan);
if (v == pBucket->range.i64MaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
int32_t tBucketUintHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, uint64_t, pBucket->type, value);
int32_t index = -1;
if (v > pBucket->range.u64MaxVal || v < pBucket->range.u64MinVal) {
return index;
}
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.u64MaxVal - pBucket->range.u64MinVal;
if (span < pBucket->numOfSlots) {
int64_t delta = v - pBucket->range.u64MinVal;
index = (int32_t) (delta % pBucket->numOfSlots);
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.u64MinVal) / slotSpan);
if (v == pBucket->range.u64MaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
double v = 0;
if (pBucket->type == TSDB_DATA_TYPE_FLOAT) {
v = GET_FLOAT_VAL(value);
} else {
v = GET_DOUBLE_VAL(value);
}
int32_t index = -1;
if (v > pBucket->range.dMaxVal || v < pBucket->range.dMinVal) {
return index;
}
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double span = pBucket->range.dMaxVal - pBucket->range.dMinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = (int32_t)(v - pBucket->range.dMinVal);
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.dMinVal) / slotSpan);
if (v == pBucket->range.dMaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
static __perc_hash_func_t getHashFunc(int32_t type) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
return tBucketIntHash;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
return tBucketUintHash;
} else {
return tBucketDoubleHash;
}
}
static void resetSlotInfo(tMemBucket* pBucket) {
for (int32_t i = 0; i < pBucket->numOfSlots; ++i) {
tMemBucketSlot* pSlot = &pBucket->pSlots[i];
resetBoundingBox(&pSlot->range, pBucket->type);
resetPosInfo(&pSlot->info);
}
}
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval) {
tMemBucket *pBucket = (tMemBucket *)calloc(1, sizeof(tMemBucket));
if (pBucket == NULL) {
return NULL;
}
pBucket->numOfSlots = DEFAULT_NUM_OF_SLOT;
pBucket->bufPageSize = DEFAULT_PAGE_SIZE * 4; // 4k per page
pBucket->type = dataType;
pBucket->bytes = nElemSize;
pBucket->total = 0;
pBucket->times = 1;
pBucket->maxCapacity = 200000;
if (setBoundingBox(&pBucket->range, pBucket->type, minval, maxval) != 0) {
// qError("MemBucket:%p, invalid value range: %f-%f", pBucket, minval, maxval);
free(pBucket);
return NULL;
}
pBucket->elemPerPage = (pBucket->bufPageSize - sizeof(SFilePage))/pBucket->bytes;
pBucket->comparFn = getKeyComparFunc(pBucket->type, TSDB_ORDER_ASC);
pBucket->hashFunc = getHashFunc(pBucket->type);
if (pBucket->hashFunc == NULL) {
// qError("MemBucket:%p, not support data type %d, failed", pBucket, pBucket->type);
free(pBucket);
return NULL;
}
pBucket->pSlots = (tMemBucketSlot *)calloc(pBucket->numOfSlots, sizeof(tMemBucketSlot));
if (pBucket->pSlots == NULL) {
free(pBucket);
return NULL;
}
resetSlotInfo(pBucket);
int32_t ret = createDiskbasedResultBuffer(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, 1);
if (ret != 0) {
tMemBucketDestroy(pBucket);
return NULL;
}
// qDebug("MemBucket:%p, elem size:%d", pBucket, pBucket->bytes);
return pBucket;
}
void tMemBucketDestroy(tMemBucket *pBucket) {
if (pBucket == NULL) {
return;
}
destroyResultBuf(pBucket->pBuffer);
tfree(pBucket->pSlots);
tfree(pBucket);
}
void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataType) {
if (IS_SIGNED_NUMERIC_TYPE(dataType)) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_FLOAT_TYPE(dataType)) {
double v = 0;
GET_TYPED_DATA(v, double, dataType, data);
if (r->dMinVal > v) {
r->dMinVal = v;
}
if (r->dMaxVal < v) {
r->dMaxVal = v;
}
} else {
assert(0);
}
}
/*
* in memory bucket, we only accept data array list
*/
int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
assert(pBucket != NULL && data != NULL && size > 0);
int32_t count = 0;
int32_t bytes = pBucket->bytes;
for (int32_t i = 0; i < size; ++i) {
char *d = (char *) data + i * bytes;
int32_t index = (pBucket->hashFunc)(pBucket, d);
if (index < 0) {
continue;
}
count += 1;
tMemBucketSlot *pSlot = &pBucket->pSlots[index];
tMemBucketUpdateBoundingBox(&pSlot->range, d, pBucket->type);
// ensure available memory pages to allocate
int32_t groupId = getGroupId(pBucket->numOfSlots, index, pBucket->times);
int32_t pageId = -1;
if (pSlot->info.data == NULL || pSlot->info.data->num >= pBucket->elemPerPage) {
if (pSlot->info.data != NULL) {
assert(pSlot->info.data->num >= pBucket->elemPerPage && pSlot->info.size > 0);
// keep the pointer in memory
releaseResBufPage(pBucket->pBuffer, pSlot->info.data);
pSlot->info.data = NULL;
}
pSlot->info.data = getNewDataBuf(pBucket->pBuffer, groupId, &pageId);
pSlot->info.pageId = pageId;
}
memcpy(pSlot->info.data->data + pSlot->info.data->num * pBucket->bytes, d, pBucket->bytes);
pSlot->info.data->num += 1;
pSlot->info.size += 1;
}
pBucket->total += count;
return 0;
}
////////////////////////////////////////////////////////////////////////////////////////////
/*
*
* now, we need to find the minimum value of the next slot for
* interpolating the percentile value
* j is the last slot of current segment, we need to get the first
* slot of the next segment.
*/
static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t slotIdx) {
int32_t j = slotIdx + 1;
while (j < pMemBucket->numOfSlots && (pMemBucket->pSlots[j].info.size == 0)) {
++j;
}
assert(j < pMemBucket->numOfSlots);
return pMemBucket->pSlots[j].range;
}
static bool isIdenticalData(tMemBucket *pMemBucket, int32_t index);
static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) {
assert(isIdenticalData(pMemBucket, slotIndex));
tMemBucketSlot *pSlot = &pMemBucket->pSlots[slotIndex];
double finalResult = 0.0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double) pSlot->range.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double) pSlot->range.u64MinVal;
} else {
finalResult = (double) pSlot->range.dMinVal;
}
return finalResult;
}
double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) {
int32_t num = 0;
for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[i];
if (pSlot->info.size == 0) {
continue;
}
// required value in current slot
if (num < (count + 1) && num + pSlot->info.size >= (count + 1)) {
if (pSlot->info.size + num == (count + 1)) {
/*
* now, we need to find the minimum value of the next slot for interpolating the percentile value
* j is the last slot of current segment, we need to get the first slot of the next segment.
*/
MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i);
double maxOfThisSlot = 0;
double minOfNextSlot = 0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double) pSlot->range.i64MaxVal;
minOfNextSlot = (double) next.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double) pSlot->range.u64MaxVal;
minOfNextSlot = (double) next.u64MinVal;
} else {
maxOfThisSlot = (double) pSlot->range.dMaxVal;
minOfNextSlot = (double) next.dMinVal;
}
assert(minOfNextSlot > maxOfThisSlot);
double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot;
return val;
}
if (pSlot->info.size <= pMemBucket->maxCapacity) {
// data in buffer and file are merged together to be processed.
SFilePage *buffer = loadDataFromFilePage(pMemBucket, i);
int32_t currentIdx = count - num;
char *thisVal = buffer->data + pMemBucket->bytes * currentIdx;
char *nextVal = thisVal + pMemBucket->bytes;
double td = 1.0, nd = 1.0;
GET_TYPED_DATA(td, double, pMemBucket->type, thisVal);
GET_TYPED_DATA(nd, double, pMemBucket->type, nextVal);
double val = (1 - fraction) * td + fraction * nd;
tfree(buffer);
return val;
} else { // incur a second round bucket split
if (isIdenticalData(pMemBucket, i)) {
return getIdenticalDataVal(pMemBucket, i);
}
// try next round
pMemBucket->times += 1;
// qDebug("MemBucket:%p, start next round data bucketing, time:%d", pMemBucket, pMemBucket->times);
pMemBucket->range = pSlot->range;
pMemBucket->total = 0;
resetSlotInfo(pMemBucket);
int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times - 1);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
assert(list->size > 0);
for (int32_t f = 0; f < list->size; ++f) {
SPageInfo *pgInfo = *(SPageInfo **)taosArrayGet(list, f);
SFilePage *pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num);
releaseResBufPageInfo(pMemBucket->pBuffer, pgInfo);
}
return getPercentileImpl(pMemBucket, count - num, fraction);
}
} else {
num += pSlot->info.size;
}
}
return 0;
}
double getPercentile(tMemBucket *pMemBucket, double percent) {
if (pMemBucket->total == 0) {
return 0.0;
}
// if only one elements exists, return it
if (pMemBucket->total == 1) {
return findOnlyResult(pMemBucket);
}
percent = fabs(percent);
// find the min/max value, no need to scan all data in bucket
if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) {
MinMaxEntry* pRange = &pMemBucket->range;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal);
return v;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal);
return v;
} else {
return fabs(percent - 100) < DBL_EPSILON? pRange->dMaxVal:pRange->dMinVal;
}
}
double percentVal = (percent * (pMemBucket->total - 1)) / ((double)100.0);
// do put data by using buckets
int32_t orderIdx = (int32_t)percentVal;
return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx);
}
/*
* check if data in one slot are all identical only need to compare with the bounding box
*/
bool isIdenticalData(tMemBucket *pMemBucket, int32_t index) {
tMemBucketSlot *pSeg = &pMemBucket->pSlots[index];
if (IS_FLOAT_TYPE(pMemBucket->type)) {
return fabs(pSeg->range.dMaxVal - pSeg->range.dMinVal) < DBL_EPSILON;
} else {
return pSeg->range.i64MinVal == pSeg->range.i64MaxVal;
}
}

View File

@ -0,0 +1,10 @@
#include "tscalarfunction.h"
SScalarFunctionInfo scalarFunc[1] = {
{
},
};

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@ target_include_directories(
target_link_libraries(
parser
PRIVATE os util common catalog executor transport
PRIVATE os util common catalog function transport
)
ADD_SUBDIRECTORY(test)
ADD_SUBDIRECTORY(test)

View File

@ -246,6 +246,7 @@ typedef struct tSqlExpr {
// used in select clause. select <SArray> from xxx
typedef struct tSqlExprItem {
tSqlExpr *pNode; // The list of expressions
int32_t functionId;
char *aliasName; // alias name, null-terminated string
bool distinct;
} tSqlExprItem;

View File

@ -26,6 +26,11 @@ extern "C" {
struct SSqlNode;
typedef struct SColumnIndex {
int16_t tableIndex;
int16_t columnIndex;
} SColumnIndex;
typedef struct SInsertStmtInfo {
SHashObj *pTableBlockHashList; // data block for each table
SArray *pDataBlocks; // SArray<STableDataBlocks*>. Merged submit block for each vgroup
@ -35,6 +40,51 @@ typedef struct SInsertStmtInfo {
char *sql; // current sql statement position
} SInsertStmtInfo;
// the structure for sql function in select clause
typedef struct SSqlExpr {
char aliasName[TSDB_COL_NAME_LEN]; // as aliasName
char token[TSDB_COL_NAME_LEN]; // original token
SColIndex colInfo;
uint64_t uid; // table uid, todo refactor use the pointer
int16_t functionId; // function id in aAgg array
int16_t resType; // return value type
int16_t resBytes; // length of return value
int32_t interBytes; // inter result buffer size
int16_t colType; // table column type
int16_t colBytes; // table column bytes
int16_t numOfParams; // argument value of each function
SVariant param[3]; // parameters are not more than 3
int32_t offset; // sub result column value of arithmetic expression.
int16_t resColId; // result column id
SColumnFilterList flist;
} SSqlExpr;
typedef struct SExprInfo {
SSqlExpr base;
struct tExprNode *pExpr;
} SExprInfo;
typedef struct SColumn {
uint64_t tableUid;
int32_t columnIndex;
SColumnInfo info;
} SColumn;
typedef struct SInternalField {
TAOS_FIELD field;
bool visible;
SExprInfo *pExpr;
} SInternalField;
void clearTableMetaInfo(STableMetaInfo* pTableMetaInfo);
void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t id);
/**
* Validate the sql info, according to the corresponding metadata info from catalog.
* @param pCatalog

View File

@ -23,10 +23,31 @@ extern "C" {
#include "os.h"
#include "ttoken.h"
#define UTIL_TABLE_IS_SUPER_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_SUPER_TABLE))
#define UTIL_TABLE_IS_CHILD_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_CHILD_TABLE))
#define UTIL_TABLE_IS_NORMAL_TABLE(metaInfo) \
(!(UTIL_TABLE_IS_SUPER_TABLE(metaInfo) || UTIL_TABLE_IS_CHILD_TABLE(metaInfo)))
#define UTIL_TABLE_IS_TMP_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_TEMP_TABLE))
int32_t parserValidateIdToken(SToken* pToken);
int32_t parserSetInvalidOperatorMsg(char* dst, int32_t dstBufLen, const char* msg);
int32_t buildInvalidOperationMsg(char* dst, int32_t dstBufLen, const char* msg);
int32_t parserSetSyntaxErrMsg(char* dst, int32_t dstBufLen, const char* additionalInfo, const char* sourceStr);
void columnListCopy(SArray* dst, const SArray* src, uint64_t tableUid);
void columnListCopyAll(SArray* dst, const SArray* src);
void columnListDestroy(SArray* pColumnList);
void cleanupTagCond(STagCond* pTagCond);
void cleanupColumnCond(SArray** pCond);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_QUERYINFOUTIL_H
#define TDENGINE_QUERYINFOUTIL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "parserInt.h"
SSchema* getTbnameColumnSchema();
int32_t getNumOfColumns(const STableMeta* pTableMeta);
int32_t getNumOfTags(const STableMeta* pTableMeta);
SSchema *getTableColumnSchema(const STableMeta *pTableMeta);
SSchema *getTableTagSchema(const STableMeta* pTableMeta);
SSchema *getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex);
size_t getNumOfExprs(SQueryStmtInfo* pQueryInfo);
SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, int16_t type,
int16_t size, int16_t resColId, int16_t interSize, int32_t colType);
void addExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index, SExprInfo* pExprInfo);
void updateExprInfo(SExprInfo* pExprInfo, int16_t functionId, int32_t colId, int16_t srcColumnIndex, int16_t resType, int16_t resSize);
void assignExprInfo(SExprInfo* dst, const SExprInfo* src);
SExprInfo* getExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index);
int32_t copyAllExprInfo(SArray* dst, const SArray* src, bool deepcopy);
void cleanupFieldInfo(SFieldInfo* pFieldInfo);
STableComInfo getTableInfo(const STableMeta* pTableMeta);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_QUERYINFOUTIL_H

View File

@ -16,6 +16,7 @@
#include "ttime.h"
#include "parserInt.h"
#include "parserUtil.h"
#include "queryInfoUtil.h"
#define TSQL_TBNAME_L "tbname"
#define DEFAULT_PRIMARY_TIMESTAMP_COL_NAME "_c0"
@ -25,18 +26,13 @@
#define COLUMN_INDEX_INITIAL_VAL (-2)
#define COLUMN_INDEX_INITIALIZER { COLUMN_INDEX_INITIAL_VAL, COLUMN_INDEX_INITIAL_VAL }
typedef struct SColumn {
uint64_t tableUid;
int32_t columnIndex;
SColumnInfo info;
} SColumn;
static int32_t validateSelectNodeList(SQueryStmtInfo* pQueryInfo, SArray* pSelNodeList, bool joinQuery, bool timeWindowQuery, bool outerQuery, char* msg, int32_t msgBufLen);
typedef struct SColumnIndex {
int16_t tableIndex;
int16_t columnIndex;
} SColumnIndex;
size_t tscNumOfExprs(SQueryStmtInfo* pQueryInfo) {
return taosArrayGetSize(pQueryInfo->exprList);
}
static int32_t evaluateImpl(tSqlExpr* pExpr, int32_t tsPrecision) {
static int32_t evaluateImpl(tSqlExpr* pExpr, int32_t tsPrecision) {
int32_t code = 0;
if (pExpr->type == SQL_NODE_EXPR) {
code = evaluateImpl(pExpr->pLeft, tsPrecision);
@ -147,7 +143,7 @@ void columnDestroy(SColumn* pCol) {
free(pCol);
}
void tscColumnListDestroy(SArray* pColumnList) {
void destroyColumnList(SArray* pColumnList) {
if (pColumnList == NULL) {
return;
}
@ -169,7 +165,7 @@ void clearTableMetaInfo(STableMetaInfo* pTableMetaInfo) {
tfree(pTableMetaInfo->pTableMeta);
tfree(pTableMetaInfo->vgroupList);
tscColumnListDestroy(pTableMetaInfo->tagColList);
destroyColumnList(pTableMetaInfo->tagColList);
pTableMetaInfo->tagColList = NULL;
free(pTableMetaInfo);
@ -183,7 +179,6 @@ void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t
// removeCachedTableMeta(pTableMetaInfo, id);
}
freeVgroupTableInfo(pTableMetaInfo->pVgroupTables);
clearTableMetaInfo(pTableMetaInfo);
}
@ -224,7 +219,7 @@ static STableMeta* extractTempTableMetaFromSubquery(SQueryStmtInfo* pUpstream) {
return meta;
}
void tscInitQueryInfo(SQueryStmtInfo* pQueryInfo) {
void initQueryInfo(SQueryStmtInfo* pQueryInfo) {
assert(pQueryInfo->fieldsInfo.internalField == NULL);
//assert(0);
// pQueryInfo->fieldsInfo.internalField = taosArrayInit(4, sizeof(SInternalField));
@ -241,7 +236,7 @@ void tscInitQueryInfo(SQueryStmtInfo* pQueryInfo) {
pQueryInfo->slimit.offset = 0;
pQueryInfo->pUpstream = taosArrayInit(4, POINTER_BYTES);
pQueryInfo->window = TSWINDOW_INITIALIZER;
pQueryInfo->multigroupResult = true;
// pQueryInfo->multigroupResult = true;
}
int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* msg, int32_t msgBufLen);
@ -307,11 +302,11 @@ static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SQueryStmtI
// union all is not support currently
SSqlNode* p = taosArrayGetP(subInfo->pSubquery, 0);
if (taosArrayGetSize(subInfo->pSubquery) >= 2) {
return parserSetInvalidOperatorMsg(msgBuf, msgBufLen, "not support union in subquery");
return buildInvalidOperationMsg(msgBuf, msgBufLen, "not support union in subquery");
}
SQueryStmtInfo* pSub = calloc(1, sizeof(SQueryStmtInfo));
tscInitQueryInfo(pSub);
initQueryInfo(pSub);
SArray *pUdfInfo = NULL;
if (pQueryInfo->pUdfInfo) {
@ -336,7 +331,7 @@ static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SQueryStmtI
if (subInfo->aliasName.n > 0) {
if (subInfo->aliasName.n >= TSDB_TABLE_FNAME_LEN) {
tfree(pTableMetaInfo1);
return parserSetInvalidOperatorMsg(msgBuf, msgBufLen, "subquery alias name too long");
return buildInvalidOperationMsg(msgBuf, msgBufLen, "subquery alias name too long");
}
tstrncpy(pTableMetaInfo1->aliasName, subInfo->aliasName.z, subInfo->aliasName.n + 1);
@ -528,8 +523,6 @@ STableMetaInfo* tscAddTableMetaInfo(SQueryStmtInfo* pQueryInfo, char* name, STab
tscColumnListCopy(pTableMetaInfo->tagColList, pTagCols, pTableMetaInfo->pTableMeta->uid);
}
pTableMetaInfo->pVgroupTables = tscVgroupTableInfoDup(pVgroupTables);
pQueryInfo->numOfTables += 1;
return pTableMetaInfo;
}
@ -600,36 +593,15 @@ int32_t getTableIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnI
return TSDB_CODE_SUCCESS;
}
STableComInfo getTableInfo(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
return pTableMeta->tableInfo;
}
int32_t getNumOfColumns(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
// table created according to super table, use data from super table
return getTableInfo(pTableMeta).numOfColumns;
}
int32_t getNumOfTags(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
return getTableInfo(pTableMeta).numOfTags;
}
SSchema *getTableSchema(const STableMeta *pTableMeta) {
assert(pTableMeta != NULL);
return (SSchema*) pTableMeta->schema;
}
static int16_t doGetColumnIndex(SQueryStmtInfo* pQueryInfo, int32_t index, SToken* pToken) {
STableMeta* pTableMeta = getMetaInfo(pQueryInfo, index)->pTableMeta;
int32_t numOfCols = getNumOfColumns(pTableMeta) + getNumOfTags(pTableMeta);
SSchema* pSchema = getTableSchema(pTableMeta);
SSchema* pSchema = getTableColumnSchema(pTableMeta);
int16_t columnIndex = COLUMN_INDEX_INITIAL_VAL;
for (int16_t i = 0; i < numOfCols; ++i) {
for (int32_t i = 0; i < numOfCols; ++i) {
if (pToken->n != strlen(pSchema[i].name)) {
continue;
}
@ -670,7 +642,7 @@ int32_t doGetColumnIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColu
if (colIndex != COLUMN_INDEX_INITIAL_VAL) {
if (pIndex->columnIndex != COLUMN_INDEX_INITIAL_VAL) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg0);
return buildInvalidOperationMsg(msg, msgBufLen, msg0);
} else {
pIndex->tableIndex = i;
pIndex->columnIndex = colIndex;
@ -685,7 +657,7 @@ int32_t doGetColumnIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColu
}
if (pIndex->columnIndex == COLUMN_INDEX_INITIAL_VAL) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg1);
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
}
@ -696,7 +668,7 @@ int32_t doGetColumnIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColu
}
}
int32_t getColumnIndexByName(const SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex, char* msg) {
int32_t getColumnIndexByName(const SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex, char* msg, int32_t msgBufLen) {
if (pQueryInfo->pTableMetaInfo == NULL || pQueryInfo->numOfTables == 0) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
@ -707,7 +679,7 @@ int32_t getColumnIndexByName(const SToken* pToken, SQueryStmtInfo* pQueryInfo, S
return TSDB_CODE_TSC_INVALID_OPERATION;
}
return doGetColumnIndexByName(&tmpToken, pQueryInfo, pIndex, msg);
return doGetColumnIndexByName(&tmpToken, pQueryInfo, pIndex, msg, msgBufLen);
}
int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg, int32_t msgBufLen) {
@ -727,7 +699,7 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
}
if (pQueryInfo->numOfTables > 1) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg4);
return buildInvalidOperationMsg(msg, msgBufLen, msg4);
}
SGroupbyExpr* pGroupExpr = &pQueryInfo->groupbyExpr;
@ -745,7 +717,7 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
int32_t numOfGroupCols = (int16_t) taosArrayGetSize(pList);
if (numOfGroupCols > TSDB_MAX_TAGS) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg1);
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
SSchema *pSchema = NULL;
@ -753,20 +725,20 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
size_t num = taosArrayGetSize(pList);
for (int32_t i = 0; i < num; ++i) {
tVariantListItem * pItem = taosArrayGet(pList, i);
tVariant* pVar = &pItem->pVar;
SListItem * pItem = taosArrayGet(pList, i);
SVariant* pVar = &pItem->pVar;
SToken token = {pVar->nLen, pVar->nType, pVar->pz};
SColumnIndex index = COLUMN_INDEX_INITIALIZER;
if (getColumnIndexByName(&token, pQueryInfo, &index, tscGetErrorMsgPayload(pCmd)) != TSDB_CODE_SUCCESS) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg2);
if (getColumnIndexByName(&token, pQueryInfo, &index, msg, msgBufLen) != TSDB_CODE_SUCCESS) {
return buildInvalidOperationMsg(msg, msgBufLen, msg2);
}
if (tableIndex == COLUMN_INDEX_INITIAL_VAL) {
tableIndex = index.tableIndex;
} else if (tableIndex != index.tableIndex) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg3);
return buildInvalidOperationMsg(msg, msgBufLen, msg3);
}
pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex);
@ -775,7 +747,7 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) {
pSchema = getTbnameColumnSchema();
} else {
pSchema = getTableColumnSchema(pTableMeta, index.columnIndex);
pSchema = getOneColumnSchema(pTableMeta, index.columnIndex);
}
int32_t numOfCols = getNumOfColumns(pTableMeta);
@ -783,7 +755,7 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
if (groupTag) {
if (!UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg6);
return buildInvalidOperationMsg(msg, msgBufLen, msg6);
}
int32_t relIndex = index.columnIndex;
@ -796,14 +768,14 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
taosArrayPush(pGroupExpr->columnInfo, &colIndex);
index.columnIndex = relIndex;
tscColumnListInsert(pTableMetaInfo->tagColList, index.columnIndex, pTableMeta->id.uid, pSchema);
tscColumnListInsert(pTableMetaInfo->tagColList, index.columnIndex, pTableMeta->uid, pSchema);
} else {
// check if the column type is valid, here only support the bool/tinyint/smallint/bigint group by
if (pSchema->type == TSDB_DATA_TYPE_TIMESTAMP || pSchema->type == TSDB_DATA_TYPE_FLOAT || pSchema->type == TSDB_DATA_TYPE_DOUBLE) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg5);
return buildInvalidOperationMsg(msg, msgBufLen, msg5);
}
tscColumnListInsert(pQueryInfo->colList, index.columnIndex, pTableMeta->id.uid, pSchema);
tscColumnListInsert(pQueryInfo->colList, index.columnIndex, pTableMeta->uid, pSchema);
SColIndex colIndex = { .colIndex = index.columnIndex, .flag = TSDB_COL_NORMAL, .colId = pSchema->colId };
strncpy(colIndex.name, pSchema->name, tListLen(colIndex.name));
@ -817,13 +789,13 @@ int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, char* msg
// 1. only one normal column allowed in the group by clause
// 2. the normal column in the group by clause can only located in the end position
if (numOfGroupCols > 1) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg7);
return buildInvalidOperationMsg(msg, msgBufLen, msg7);
}
for(int32_t i = 0; i < num; ++i) {
SColIndex* pIndex = taosArrayGet(pGroupExpr->columnInfo, i);
if (TSDB_COL_IS_NORMAL_COL(pIndex->flag) && i != num - 1) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg8);
return buildInvalidOperationMsg(msg, msgBufLen, msg8);
}
}
@ -863,7 +835,6 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
}
if (pSqlNode->from->type == SQL_NODE_FROM_SUBQUERY) {
// clearAllTableMetaInfo(pQueryInfo, false, pSql->self);
pQueryInfo->numOfTables = 0;
// parse the subquery in the first place
@ -873,7 +844,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
SRelElementPair* subInfo = taosArrayGet(pSqlNode->from->list, i);
SSqlNode* p = taosArrayGetP(subInfo->pSubquery, 0);
if (p->from->type == SQL_NODE_FROM_SUBQUERY) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg9);
return buildInvalidOperationMsg(msg, msgBufLen, msg9);
}
code = doValidateSubquery(pSqlNode, i, pQueryInfo, msg, msgBufLen);
@ -884,25 +855,25 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
int32_t timeWindowQuery =
(TPARSER_HAS_TOKEN(pSqlNode->interval.interval) || TPARSER_HAS_TOKEN(pSqlNode->sessionVal.gap));
TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_TABLE_QUERY);
// TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_TABLE_QUERY);
// parse the group by clause in the first place
if (validateGroupbyNode(pQueryInfo, pSqlNode->pGroupby, pCmd) != TSDB_CODE_SUCCESS) {
if (validateGroupbyNode(pQueryInfo, pSqlNode->pGroupby, msg, msgBufLen) != TSDB_CODE_SUCCESS) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
if (validateSelectNodeList(pCmd, pQueryInfo, pSqlNode->pSelNodeList, false, timeWindowQuery, true) !=
if (validateSelectNodeList(pQueryInfo, pSqlNode->pSelNodeList, false, timeWindowQuery, true, msg, msgBufLen) !=
TSDB_CODE_SUCCESS) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
// todo NOT support yet
for (int32_t i = 0; i < tscNumOfExprs(pQueryInfo); ++i) {
SExprInfo* pExpr = tscExprGet(pQueryInfo, i);
SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
int32_t f = pExpr->base.functionId;
if (f == TSDB_FUNC_STDDEV || f == TSDB_FUNC_PERCT || f == TSDB_FUNC_INTERP) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg6);
return buildInvalidOperationMsg(msg, msgBufLen, msg6);
}
if ((timeWindowQuery || pQueryInfo->stateWindow) && f == TSDB_FUNC_LAST) {
@ -913,17 +884,17 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
}
STableMeta* pTableMeta = getMetaInfo(pQueryInfo, 0)->pTableMeta;
SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, 0);
SSchema* pSchema = getOneColumnSchema(pTableMeta, 0);
if (pSchema->type != TSDB_DATA_TYPE_TIMESTAMP) {
int32_t numOfExprs = (int32_t)tscNumOfExprs(pQueryInfo);
for (int32_t i = 0; i < numOfExprs; ++i) {
SExprInfo* pExpr = tscExprGet(pQueryInfo, i);
SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
int32_t f = pExpr->base.functionId;
if (f == TSDB_FUNC_DERIVATIVE || f == TSDB_FUNC_TWA || f == TSDB_FUNC_IRATE) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg7);
return buildInvalidOperationMsg(msg, msgBufLen, msg7);
}
}
}
@ -935,7 +906,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
}
} else {
if (pQueryInfo->numOfTables > 1) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg8);
return buildInvalidOperationMsg(msg, msgBufLen, msg8);
}
}
@ -956,7 +927,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
// check if the first column of the nest query result is timestamp column
SColumn* pCol = taosArrayGetP(pQueryInfo->colList, 0);
if (pCol->info.type != TSDB_DATA_TYPE_TIMESTAMP) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg4);
return buildInvalidOperationMsg(msg, msgBufLen, msg4);
}
if (validateFunctionsInIntervalOrGroupbyQuery(pCmd, pQueryInfo) != TSDB_CODE_SUCCESS) {
@ -1005,7 +976,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
size_t numOfTables = taosArrayGetSize(pSqlNode->from->list);
if (numOfTables > TSDB_MAX_JOIN_TABLE_NUM) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg2);
return buildInvalidOperationMsg(msg, msgBufLen, msg2);
}
// set all query tables, which are maybe more than one.
@ -1033,7 +1004,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
pSqlNode->pWhere = NULL;
} else {
if (taosArrayGetSize(pSqlNode->from->list) > 1) { // Cross join not allowed yet
return parserSetInvalidOperatorMsg(msg, msgBufLen, "cross join not supported yet");
return buildInvalidOperationMsg(msg, msgBufLen, "cross join not supported yet");
}
}
@ -1041,7 +1012,7 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
int32_t timeWindowQuery =
(TPARSER_HAS_TOKEN(pSqlNode->interval.interval) || TPARSER_HAS_TOKEN(pSqlNode->sessionVal.gap));
if (validateSelectNodeList(pCmd, pQueryInfo, pSqlNode->pSelNodeList, joinQuery, timeWindowQuery, false) !=
if (validateSelectNodeList(pQueryInfo, pSqlNode->pSelNodeList, joinQuery, timeWindowQuery, false, msg, msgBufLen) !=
TSDB_CODE_SUCCESS) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
@ -1067,11 +1038,11 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
}
if (tscQueryTags(pQueryInfo)) {
SExprInfo* pExpr1 = tscExprGet(pQueryInfo, 0);
SExprInfo* pExpr1 = getExprInfo(pQueryInfo, 0);
if (pExpr1->base.functionId != TSDB_FUNC_TID_TAG) {
if ((pQueryInfo->colCond && taosArrayGetSize(pQueryInfo->colCond) > 0) || IS_TSWINDOW_SPECIFIED(pQueryInfo->window)) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg5);
return buildInvalidOperationMsg(msg, msgBufLen, msg5);
}
}
}
@ -1108,14 +1079,14 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
}
if (!hasTimestampForPointInterpQuery(pQueryInfo)) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg1);
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
// in case of join query, time range is required.
if (QUERY_IS_JOIN_QUERY(pQueryInfo->type)) {
uint64_t timeRange = (uint64_t)pQueryInfo->window.ekey - pQueryInfo->window.skey;
if (timeRange == 0 && pQueryInfo->window.skey == 0) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg3);
return buildInvalidOperationMsg(msg, msgBufLen, msg3);
}
}
@ -1160,6 +1131,119 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, char* ms
return TSDB_CODE_SUCCESS; // Does not build query message here
}
int32_t validateSelectNodeList(SQueryStmtInfo* pQueryInfo, SArray* pSelNodeList, bool joinQuery, bool timeWindowQuery, bool outerQuery, char* msg, int32_t msgBufLen) {
assert(pSelNodeList != NULL);
const char* msg1 = "too many items in selection clause";
const char* msg2 = "functions or others can not be mixed up";
const char* msg3 = "not support query expression";
const char* msg4 = "not support distinct mixed with proj/agg func";
const char* msg5 = "invalid function name";
const char* msg6 = "not support distinct mixed with join";
const char* msg7 = "not support distinct mixed with groupby";
const char* msg8 = "not support distinct in nest query";
const char* msg9 = "_block_dist not support subquery, only support stable/table";
// too many result columns not support order by in query
if (taosArrayGetSize(pSelNodeList) > TSDB_MAX_COLUMNS) {
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
if (pQueryInfo->colList == NULL) {
pQueryInfo->colList = taosArrayInit(4, POINTER_BYTES);
}
bool hasDistinct = false;
bool hasAgg = false;
size_t numOfExpr = taosArrayGetSize(pSelNodeList);
int32_t distIdx = -1;
for (int32_t i = 0; i < numOfExpr; ++i) {
int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo);
tSqlExprItem* pItem = taosArrayGet(pSelNodeList, i);
if (hasDistinct == false) {
hasDistinct = (pItem->distinct == true);
distIdx = hasDistinct ? i : -1;
}
int32_t type = pItem->pNode->type;
if (type == SQL_NODE_SQLFUNCTION) {
hasAgg = true;
if (hasDistinct) break;
pItem->functionId = qIsBuiltinFunction(pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n);
if (pItem->pNode->functionId == TSDB_FUNC_BLKINFO && taosArrayGetSize(pQueryInfo->pUpstream) > 0) {
return buildInvalidOperationMsg(msg, msgBufLen, msg9);
}
SUdfInfo* pUdfInfo = NULL;
if (pItem->pNode->functionId < 0) {
pUdfInfo = isValidUdf(pQueryInfo->pUdfInfo, pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n);
if (pUdfInfo == NULL) {
return buildInvalidOperationMsg(msg, msgBufLen, msg5);
}
pItem->pNode->functionId = pUdfInfo->functionId;
}
// sql function in selection clause, append sql function info in pSqlCmd structure sequentially
if (addExprAndResultField(pCmd, pQueryInfo, outputIndex, pItem, true, pUdfInfo) != TSDB_CODE_SUCCESS) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
} else if (type == SQL_NODE_TABLE_COLUMN || type == SQL_NODE_VALUE) {
// use the dynamic array list to decide if the function is valid or not
// select table_name1.field_name1, table_name2.field_name2 from table_name1, table_name2
if (addProjectionExprAndResultField(pCmd, pQueryInfo, pItem, outerQuery) != TSDB_CODE_SUCCESS) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
} else if (type == SQL_NODE_EXPR) {
int32_t code = handleArithmeticExpr(pCmd, pQueryInfo, i, pItem);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
} else {
return buildInvalidOperationMsg(msg, msgBufLen, msg3);
}
if (pQueryInfo->fieldsInfo.numOfOutput > TSDB_MAX_COLUMNS) {
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
}
//TODO(dengyihao), refactor as function
//handle distinct func mixed with other func
if (hasDistinct == true) {
if (distIdx != 0 || hasAgg) {
return buildInvalidOperationMsg(msg, msgBufLen, msg4);
}
if (joinQuery) {
return buildInvalidOperationMsg(msg, msgBufLen, msg6);
}
if (pQueryInfo->groupbyExpr.numOfGroupCols != 0) {
return buildInvalidOperationMsg(msg, msgBufLen, msg7);
}
if (pQueryInfo->pDownstream != NULL) {
return buildInvalidOperationMsg(msg, msgBufLen, msg8);
}
pQueryInfo->distinct = true;
}
// there is only one user-defined column in the final result field, add the timestamp column.
size_t numOfSrcCols = taosArrayGetSize(pQueryInfo->colList);
if ((numOfSrcCols <= 0 || !hasNoneUserDefineExpr(pQueryInfo)) && !tscQueryTags(pQueryInfo) && !tscQueryBlockInfo(pQueryInfo)) {
addPrimaryTsColIntoResult(pQueryInfo, pCmd);
}
if (!functionCompatibleCheck(pQueryInfo, joinQuery, timeWindowQuery)) {
return buildInvalidOperationMsg(msg, msgBufLen, msg2);
}
return TSDB_CODE_SUCCESS;
}
int32_t evaluateSqlNode(SSqlNode* pNode, int32_t tsPrecision, char* msg, int32_t msgBufLen) {
assert(pNode != NULL && msg != NULL && msgBufLen > 0);
if (pNode->pWhere == NULL) {

View File

@ -17,10 +17,17 @@
#include "parserInt.h"
#include "parserUtil.h"
#include "ttoken.h"
#include "executor.h"
#include "function.h"
bool qIsInsertSql(const char* pStr, size_t length) {
return false;
int32_t index = 0;
do {
SToken t0 = tStrGetToken(pStr, &index, false);
if (t0.type != TK_LP) {
return t0.type == TK_INSERT || t0.type == TK_IMPORT;
}
} while (1);
}
int32_t qParseQuerySql(const char* pStr, size_t length, struct SQueryStmtInfo** pQueryInfo, int64_t id, char* msg, int32_t msgLen) {
@ -110,12 +117,12 @@ int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, char*
SToken* t = &item->tableName;
if (t->type == TK_INTEGER || t->type == TK_FLOAT || t->type == TK_STRING) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg1);
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
// tscDequoteAndTrimToken(t);
if (parserValidateIdToken(t) != TSDB_CODE_SUCCESS) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, msg1);
return buildInvalidOperationMsg(msg, msgBufLen, msg1);
}
SName name = {0};
@ -144,7 +151,7 @@ int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SMetaReq* pMet
for (int32_t i = 0; i < size; ++i) {
SSqlNode* pSqlNode = taosArrayGetP(pSqlInfo->list, i);
if (pSqlNode->from == NULL) {
return parserSetInvalidOperatorMsg(msg, msgBufLen, "invalid from clause");
return buildInvalidOperationMsg(msg, msgBufLen, "invalid from clause");
}
// load the table meta in the FROM clause
@ -179,7 +186,7 @@ int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SMetaReq* pMet
}
// Let's assume that it is an UDF/UDAF, if it is not a built-in function.
if (!isBuiltinFunction(t->z, t->n)) {
if (!qIsBuiltinFunction(t->z, t->n)) {
char* fname = strndup(t->z, t->n);
taosArrayPush(pMetaInfo->pUdf, &fname);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,370 @@
#include "queryInfoUtil.h"
#include "tmsgtype.h"
#include "astGenerator.h"
#include "os.h"
#include "parser.h"
#include "parserInt.h"
#include "parserUtil.h"
static struct SSchema _s = {
.colId = TSDB_TBNAME_COLUMN_INDEX,
.type = TSDB_DATA_TYPE_BINARY,
.bytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE,
.name = "tbname",
};
SSchema* getTbnameColumnSchema() {
return &_s;
}
size_t getNumOfExprs(SQueryStmtInfo* pQueryInfo) {
return taosArrayGetSize(pQueryInfo->exprList);
}
SSchema* getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex) {
assert(pTableMeta != NULL && pTableMeta->schema != NULL && colIndex >= 0 && colIndex < getNumOfColumns(pTableMeta));
SSchema* pSchema = (SSchema*) pTableMeta->schema;
return &pSchema[colIndex];
}
STableComInfo getTableInfo(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
return pTableMeta->tableInfo;
}
int32_t getNumOfColumns(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
// table created according to super table, use data from super table
return getTableInfo(pTableMeta).numOfColumns;
}
int32_t getNumOfTags(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL);
return getTableInfo(pTableMeta).numOfTags;
}
SSchema *getTableColumnSchema(const STableMeta *pTableMeta) {
assert(pTableMeta != NULL);
return (SSchema*) pTableMeta->schema;
}
SSchema* getTableTagSchema(const STableMeta* pTableMeta) {
assert(pTableMeta != NULL && (pTableMeta->tableType == TSDB_SUPER_TABLE || pTableMeta->tableType == TSDB_CHILD_TABLE));
return getOneColumnSchema(pTableMeta, getTableInfo(pTableMeta).numOfColumns);
}
SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, int16_t type,
int16_t size, int16_t resColId, int16_t interSize, int32_t colType) {
SExprInfo* pExpr = calloc(1, sizeof(SExprInfo));
if (pExpr == NULL) {
return NULL;
}
SSqlExpr* p = &pExpr->base;
p->functionId = functionId;
// set the correct columnIndex index
if (pColIndex->columnIndex == TSDB_TBNAME_COLUMN_INDEX) {
SSchema* s = getTbnameColumnSchema();
p->colInfo.colId = TSDB_TBNAME_COLUMN_INDEX;
p->colBytes = s->bytes;
p->colType = s->type;
} else if (pColIndex->columnIndex <= TSDB_UD_COLUMN_INDEX) {
p->colInfo.colId = pColIndex->columnIndex;
p->colBytes = size;
p->colType = type;
} else if (functionId == 0/*TSDB_FUNC_BLKINFO*/) {
assert(0);
p->colInfo.colId = pColIndex->columnIndex;
p->colBytes = TSDB_MAX_BINARY_LEN;
p->colType = TSDB_DATA_TYPE_BINARY;
} else {
int32_t len = tListLen(p->colInfo.name);
if (TSDB_COL_IS_TAG(colType)) {
SSchema* pSchema = getTableTagSchema(pTableMetaInfo->pTableMeta);
p->colInfo.colId = pSchema[pColIndex->columnIndex].colId;
p->colBytes = pSchema[pColIndex->columnIndex].bytes;
p->colType = pSchema[pColIndex->columnIndex].type;
snprintf(p->colInfo.name, len, "%s.%s", pTableMetaInfo->aliasName, pSchema[pColIndex->columnIndex].name);
} else if (pTableMetaInfo->pTableMeta != NULL) {
// in handling select database/version/server_status(), the pTableMeta is NULL
SSchema* pSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, pColIndex->columnIndex);
p->colInfo.colId = pSchema->colId;
p->colBytes = pSchema->bytes;
p->colType = pSchema->type;
snprintf(p->colInfo.name, len, "%s.%s", pTableMetaInfo->aliasName, pSchema->name);
}
}
p->colInfo.flag = colType;
p->colInfo.colIndex = pColIndex->columnIndex;
p->resType = type;
p->resBytes = size;
p->resColId = resColId;
p->interBytes = interSize;
if (pTableMetaInfo->pTableMeta) {
p->uid = pTableMetaInfo->pTableMeta->uid;
}
return pExpr;
}
void addExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index, SExprInfo* pExprInfo) {
assert(pQueryInfo != NULL && pQueryInfo->exprList != NULL);
int32_t num = (int32_t) taosArrayGetSize(pQueryInfo->exprList);
if (index == num) {
taosArrayPush(pQueryInfo->exprList, &pExprInfo);
} else {
taosArrayInsert(pQueryInfo->exprList, index, &pExprInfo);
}
}
void updateExprInfo(SExprInfo* pExprInfo, int16_t functionId, int32_t colId, int16_t srcColumnIndex, int16_t resType, int16_t resSize) {
assert(pExprInfo != NULL);
SSqlExpr* pse = &pExprInfo->base;
pse->functionId = functionId;
pse->colInfo.colIndex = srcColumnIndex;
pse->colInfo.colId = colId;
pse->resType = resType;
pse->resBytes = resSize;
}
SExprInfo* getExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index) {
assert(pQueryInfo != NULL && pQueryInfo->exprList && index >= 0);
return taosArrayGetP(pQueryInfo->exprList, index);
}
void destroyExprInfo(SArray* pExprInfo) {
size_t size = taosArrayGetSize(pExprInfo);
for(int32_t i = 0; i < size; ++i) {
SExprInfo* pExpr = taosArrayGetP(pExprInfo, i);
tSqlExprDestroy(&pExpr->base);
}
taosArrayDestroy(pExprInfo);
}
void addExprParam(SSqlExpr* pExpr, char* argument, int32_t type, int32_t bytes) {
assert (pExpr != NULL || argument != NULL || bytes != 0);
// set parameter value
// transfer to tVariant from byte data/no ascii data
taosVariantCreateFromBinary(&pExpr->param[pExpr->numOfParams], argument, bytes, type);
pExpr->numOfParams += 1;
assert(pExpr->numOfParams <= 3);
}
void assignExprInfo(SExprInfo* dst, const SExprInfo* src) {
assert(dst != NULL && src != NULL);
*dst = *src;
if (src->base.flist.numOfFilters > 0) {
dst->base.flist.filterInfo = calloc(src->base.flist.numOfFilters, sizeof(SColumnFilterInfo));
memcpy(dst->base.flist.filterInfo, src->base.flist.filterInfo, sizeof(SColumnFilterInfo) * src->base.flist.numOfFilters);
}
assert(0);
// dst->pExpr = exprdup(src->pExpr);
memset(dst->base.param, 0, sizeof(SVariant) * tListLen(dst->base.param));
for (int32_t j = 0; j < src->base.numOfParams; ++j) {
taosVariantAssign(&dst->base.param[j], &src->base.param[j]);
}
}
int32_t copyOneExprInfo(SArray* dst, const SArray* src, uint64_t uid, bool deepcopy) {
assert(src != NULL && dst != NULL);
size_t size = taosArrayGetSize(src);
for (int32_t i = 0; i < size; ++i) {
SExprInfo* pExpr = taosArrayGetP(src, i);
if (pExpr->base.uid == uid) {
if (deepcopy) {
SExprInfo* p1 = calloc(1, sizeof(SExprInfo));
assignExprInfo(p1, pExpr);
taosArrayPush(dst, &p1);
} else {
taosArrayPush(dst, &pExpr);
}
}
}
return 0;
}
int32_t copyAllExprInfo(SArray* dst, const SArray* src, bool deepcopy) {
assert(src != NULL && dst != NULL);
size_t size = taosArrayGetSize(src);
for (int32_t i = 0; i < size; ++i) {
SExprInfo* pExpr = taosArrayGetP(src, i);
if (deepcopy) {
SExprInfo* p1 = calloc(1, sizeof(SExprInfo));
assignExprInfo(p1, pExpr);
taosArrayPush(dst, &p1);
} else {
taosArrayPush(dst, &pExpr);
}
}
return 0;
}
//void* tSqlExprDestroy(SExprInfo* pExpr) {
// if (pExpr == NULL) {
// return NULL;
// }
//
// SSqlExpr* p = &pExpr->base;
// for(int32_t i = 0; i < tListLen(p->param); ++i) {
// taosVariantDestroy(&p->param[i]);
// }
//
// if (p->flist.numOfFilters > 0) {
// tfree(p->flist.filterInfo);
// }
//
// if (pExpr->pExpr != NULL) {
// tExprTreeDestroy(pExpr->pExpr, NULL);
// }
//
// tfree(pExpr);
// return NULL;
//}
int32_t getResRowLength(SArray* pExprList) {
size_t num = taosArrayGetSize(pExprList);
if (num == 0) {
return 0;
}
int32_t size = 0;
for(int32_t i = 0; i < num; ++i) {
SExprInfo* pExpr = taosArrayGetP(pExprList, i);
size += pExpr->base.resBytes;
}
return size;
}
static void freeQueryInfoImpl(SQueryStmtInfo* pQueryInfo) {
cleanupTagCond(&pQueryInfo->tagCond);
cleanupColumnCond(&pQueryInfo->colCond);
cleanupFieldInfo(&pQueryInfo->fieldsInfo);
destroyExprInfo(pQueryInfo->exprList);
pQueryInfo->exprList = NULL;
if (pQueryInfo->exprList1 != NULL) {
destroyExprInfo(pQueryInfo->exprList1);
pQueryInfo->exprList1 = NULL;
}
columnListDestroy(pQueryInfo->colList);
pQueryInfo->colList = NULL;
if (pQueryInfo->groupbyExpr.columnInfo != NULL) {
taosArrayDestroy(pQueryInfo->groupbyExpr.columnInfo);
pQueryInfo->groupbyExpr.columnInfo = NULL;
}
pQueryInfo->fillType = 0;
tfree(pQueryInfo->fillVal);
tfree(pQueryInfo->buf);
taosArrayDestroy(pQueryInfo->pUpstream);
pQueryInfo->pUpstream = NULL;
pQueryInfo->bufLen = 0;
}
void freeQueryInfo(SQueryStmtInfo* pQueryInfo, bool removeCachedMeta, uint64_t id) {
while(pQueryInfo != NULL) {
SQueryStmtInfo* p = pQueryInfo->sibling;
size_t numOfUpstream = taosArrayGetSize(pQueryInfo->pUpstream);
for(int32_t i = 0; i < numOfUpstream; ++i) {
SQueryStmtInfo* pUpQueryInfo = taosArrayGetP(pQueryInfo->pUpstream, i);
freeQueryInfoImpl(pUpQueryInfo);
clearAllTableMetaInfo(pUpQueryInfo, removeCachedMeta, id);
tfree(pUpQueryInfo);
}
freeQueryInfoImpl(pQueryInfo);
clearAllTableMetaInfo(pQueryInfo, removeCachedMeta, id);
tfree(pQueryInfo);
pQueryInfo = p;
}
}
SArray* extractFunctionIdList(SArray* pExprInfoList) {
assert(pExprInfoList != NULL);
size_t len = taosArrayGetSize(pExprInfoList);
SArray* p = taosArrayInit(len, sizeof(int16_t));
for(int32_t i = 0; i < len; ++i) {
SExprInfo* pExprInfo = taosArrayGetP(pExprInfoList, i);
taosArrayPush(p, &pExprInfo->base.functionId);
}
return p;
}
bool tscIsProjectionQueryOnSTable(SQueryStmtInfo* pQueryInfo, int32_t tableIndex);
bool tscNonOrderedProjectionQueryOnSTable(SQueryStmtInfo* pQueryInfo, int32_t tableIndex) {
if (!tscIsProjectionQueryOnSTable(pQueryInfo, tableIndex)) {
return false;
}
// order by columnIndex exists, not a non-ordered projection query
return pQueryInfo->order.orderColId < 0;
}
// not order by timestamp projection query on super table
bool tscOrderedProjectionQueryOnSTable(SQueryStmtInfo* pQueryInfo, int32_t tableIndex) {
if (!tscIsProjectionQueryOnSTable(pQueryInfo, tableIndex)) {
return false;
}
// order by columnIndex exists, a non-ordered projection query
return pQueryInfo->order.orderColId >= 0;
}
bool tscHasColumnFilter(SQueryStmtInfo* pQueryInfo) {
// filter on primary timestamp column
if (pQueryInfo->window.skey != INT64_MIN || pQueryInfo->window.ekey != INT64_MAX) {
return true;
}
size_t size = taosArrayGetSize(pQueryInfo->colList);
for (int32_t i = 0; i < size; ++i) {
SColumn* pCol = taosArrayGetP(pQueryInfo->colList, i);
if (pCol->info.flist.numOfFilters > 0) {
return true;
}
}
return false;
}
//void tscClearInterpInfo(SQueryStmtInfo* pQueryInfo) {
// if (!tscIsPointInterpQuery(pQueryInfo)) {
// return;
// }
//
// pQueryInfo->fillType = TSDB_FILL_NONE;
// tfree(pQueryInfo->fillVal);
//}

View File

@ -0,0 +1,451 @@
#include "tpagedfile.h"
#include "thash.h"
#include "stddef.h"
#include "taoserror.h"
#include "tcompression.h"
#define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES)
#define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages)
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId) {
*pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf));
SDiskbasedResultBuf* pResBuf = *pResultBuf;
if (pResBuf == NULL) {
return TSDB_CODE_COM_OUT_OF_MEMORY;
}
pResBuf->pageSize = pagesize;
pResBuf->numOfPages = 0; // all pages are in buffer in the first place
pResBuf->totalBufSize = 0;
pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit.
pResBuf->allocateId = -1;
pResBuf->comp = true;
pResBuf->file = NULL;
pResBuf->qId = qId;
pResBuf->fileSize = 0;
// at least more than 2 pages must be in memory
assert(inMemBufSize >= pagesize * 2);
pResBuf->lruList = tdListNew(POINTER_BYTES);
// init id hash table
pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES
pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
char path[PATH_MAX] = {0};
taosGetTmpfilePath("qbuf", path);
pResBuf->path = strdup(path);
pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t));
// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize,
// pResBuf->inMemPages, pResBuf->path);
return TSDB_CODE_SUCCESS;
}
static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) {
pResultBuf->file = fopen(pResultBuf->path, "wb+");
if (pResultBuf->file == NULL) {
// qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno);
}
return TSDB_CODE_SUCCESS;
}
static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (!pResultBuf->comp) {
*dst = srcSize;
return data;
}
*dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0);
memcpy(data, pResultBuf->assistBuf, *dst);
return data;
}
static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (!pResultBuf->comp) {
*dst = srcSize;
return data;
}
*dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0);
if (*dst > 0) {
memcpy(data, pResultBuf->assistBuf, *dst);
}
return data;
}
static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) {
if (pResultBuf->pFree == NULL) {
return pResultBuf->nextPos;
} else {
int32_t offset = -1;
size_t num = taosArrayGetSize(pResultBuf->pFree);
for(int32_t i = 0; i < num; ++i) {
SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i);
if (pi->len >= size) {
offset = pi->offset;
pi->offset += (int32_t)size;
pi->len -= (int32_t)size;
return offset;
}
}
// no available recycle space, allocate new area in file
return pResultBuf->nextPos;
}
}
static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
assert(!pg->used && pg->pData != NULL);
int32_t size = -1;
char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf);
// this page is flushed to disk for the first time
if (pg->info.offset == -1) {
pg->info.offset = allocatePositionInFile(pResultBuf, size);
pResultBuf->nextPos += size;
int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
assert(ret == 0);
ret = (int32_t) fwrite(t, 1, size, pResultBuf->file);
assert(ret == size);
if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
pResultBuf->fileSize = pg->info.offset + pg->info.length;
}
} else {
// length becomes greater, current space is not enough, allocate new place, otherwise, do nothing
if (pg->info.length < size) {
// 1. add current space to free list
taosArrayPush(pResultBuf->pFree, &pg->info);
// 2. allocate new position, and update the info
pg->info.offset = allocatePositionInFile(pResultBuf, size);
pResultBuf->nextPos += size;
}
//3. write to disk.
int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
if (ret != 0) { // todo handle the error case
}
ret = (int32_t)fwrite(t, size, 1, pResultBuf->file);
if (ret != size) { // todo handle the error case
}
if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
pResultBuf->fileSize = pg->info.offset + pg->info.length;
}
}
char* ret = pg->pData;
memset(ret, 0, pResultBuf->pageSize);
pg->pData = NULL;
pg->info.length = size;
pResultBuf->statis.flushBytes += pg->info.length;
return ret;
}
static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = TSDB_CODE_SUCCESS;
assert(((int64_t) pResultBuf->numOfPages * pResultBuf->pageSize) == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages);
if (pResultBuf->file == NULL) {
if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) {
terrno = ret;
return NULL;
}
}
return doFlushPageToDisk(pResultBuf, pg);
}
// load file block data in disk
static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
ret = (int32_t)fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file);
if (ret != pg->info.length) {
terrno = errno;
return NULL;
}
pResultBuf->statis.loadBytes += pg->info.length;
int32_t fullSize = 0;
doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf);
return (char*)GET_DATA_PAYLOAD(pg);
}
static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL);
SArray* pa = taosArrayInit(1, POINTER_BYTES);
int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES);
assert(ret == 0);
return pa;
}
static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) {
SIDList list = NULL;
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id
list = addNewGroup(pResultBuf, groupId);
} else {
list = (SIDList) (*p);
}
pResultBuf->numOfPages += 1;
SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL};
ppi->pageId = pageId;
ppi->pData = NULL;
ppi->info = PAGE_INFO_INITIALIZER;
ppi->used = true;
ppi->pn = NULL;
return *(SPageInfo**) taosArrayPush(list, &ppi);
}
static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) {
SListIter iter = {0};
tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD);
SListNode* pn = NULL;
while((pn = tdListNext(&iter)) != NULL) {
assert(pn != NULL);
SPageInfo* pageInfo = *(SPageInfo**) pn->data;
assert(pageInfo->pageId >= 0 && pageInfo->pn == pn);
if (!pageInfo->used) {
break;
}
}
return pn;
}
static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) {
char* bufPage = NULL;
SListNode* pn = getEldestUnrefedPage(pResultBuf);
// all pages are referenced by user, try to allocate new space
if (pn == NULL) {
int32_t prev = pResultBuf->inMemPages;
// increase by 50% of previous mem pages
pResultBuf->inMemPages = (int32_t)(pResultBuf->inMemPages * 1.5f);
// qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev,
// pResultBuf->inMemPages, pResultBuf->pageSize);
} else {
pResultBuf->statis.flushPages += 1;
tdListPopNode(pResultBuf->lruList, pn);
SPageInfo* d = *(SPageInfo**) pn->data;
assert(d->pn == pn);
d->pn = NULL;
tfree(pn);
bufPage = flushPageToDisk(pResultBuf, d);
}
return bufPage;
}
static void lruListPushFront(SList *pList, SPageInfo* pi) {
tdListPrepend(pList, &pi);
SListNode* front = tdListGetHead(pList);
pi->pn = front;
}
static void lruListMoveToFront(SList *pList, SPageInfo* pi) {
tdListPopNode(pList, pi->pn);
tdListPrependNode(pList, pi->pn);
}
static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) {
return pageSize + POINTER_BYTES + 2 + sizeof(SFilePage);
}
SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) {
pResultBuf->statis.getPages += 1;
char* availablePage = NULL;
if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) {
availablePage = evicOneDataPage(pResultBuf);
}
// register new id in this group
*pageId = (++pResultBuf->allocateId);
// register page id info
SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId);
// add to LRU list
assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0);
lruListPushFront(pResultBuf->lruList, pi);
// add to hash map
taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES);
// allocate buf
if (availablePage == NULL) {
pi->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize)); // add extract bytes in case of zipped buffer increased.
} else {
pi->pData = availablePage;
}
pResultBuf->totalBufSize += pResultBuf->pageSize;
((void**)pi->pData)[0] = pi;
pi->used = true;
return (void *)(GET_DATA_PAYLOAD(pi));
}
SFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) {
assert(pResultBuf != NULL && id >= 0);
pResultBuf->statis.getPages += 1;
SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t));
assert(pi != NULL && *pi != NULL);
if ((*pi)->pData != NULL) { // it is in memory
// no need to update the LRU list if only one page exists
if (pResultBuf->numOfPages == 1) {
(*pi)->used = true;
return (void *)(GET_DATA_PAYLOAD(*pi));
}
SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data);
assert(*pInfo == *pi);
lruListMoveToFront(pResultBuf->lruList, (*pi));
(*pi)->used = true;
return (void *)(GET_DATA_PAYLOAD(*pi));
} else { // not in memory
assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0);
char* availablePage = NULL;
if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) {
availablePage = evicOneDataPage(pResultBuf);
}
if (availablePage == NULL) {
(*pi)->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize));
} else {
(*pi)->pData = availablePage;
}
((void**)((*pi)->pData))[0] = (*pi);
lruListPushFront(pResultBuf->lruList, *pi);
(*pi)->used = true;
loadPageFromDisk(pResultBuf, *pi);
return (void *)(GET_DATA_PAYLOAD(*pi));
}
}
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) {
assert(pResultBuf != NULL && page != NULL);
char* p = (char*) page - POINTER_BYTES;
SPageInfo* ppi = ((SPageInfo**) p)[0];
releaseResBufPageInfo(pResultBuf, ppi);
}
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) {
assert(pi->pData != NULL && pi->used);
pi->used = false;
pResultBuf->statis.releasePages += 1;
}
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); }
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return (size_t)pResultBuf->totalBufSize; }
SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
assert(pResultBuf != NULL);
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id
return pResultBuf->emptyDummyIdList;
} else {
return (SArray*) (*p);
}
}
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) {
if (pResultBuf == NULL) {
return;
}
if (pResultBuf->file != NULL) {
// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, inmem size:%.2f Kb, file size:%.2f Kb",
// pResultBuf->qId, pResultBuf->totalBufSize/1024.0, listNEles(pResultBuf->lruList) * pResultBuf->pageSize / 1024.0,
// pResultBuf->fileSize/1024.0);
fclose(pResultBuf->file);
} else {
// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, no file created", pResultBuf->qId,
// pResultBuf->totalBufSize/1024.0);
}
remove(pResultBuf->path);
tfree(pResultBuf->path);
SArray** p = taosHashIterate(pResultBuf->groupSet, NULL);
while(p) {
size_t n = taosArrayGetSize(*p);
for(int32_t i = 0; i < n; ++i) {
SPageInfo* pi = taosArrayGetP(*p, i);
tfree(pi->pData);
tfree(pi);
}
taosArrayDestroy(*p);
p = taosHashIterate(pResultBuf->groupSet, p);
}
tdListFree(pResultBuf->lruList);
taosArrayDestroy(pResultBuf->emptyDummyIdList);
taosHashCleanup(pResultBuf->groupSet);
taosHashCleanup(pResultBuf->all);
tfree(pResultBuf->assistBuf);
tfree(pResultBuf);
}
SPageInfo* getLastPageInfo(SIDList pList) {
size_t size = taosArrayGetSize(pList);
return (SPageInfo*) taosArrayGetP(pList, size - 1);
}