diff --git a/2.0/src/query/inc/qExecutor.h b/2.0/src/query/inc/qExecutor.h index 0c0e3363c8..9c738dad98 100644 --- a/2.0/src/query/inc/qExecutor.h +++ b/2.0/src/query/inc/qExecutor.h @@ -582,9 +582,9 @@ typedef struct SOrderOperatorInfo { void appendUpstream(SOperatorInfo* p, SOperatorInfo* pUpstream); -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime); +SOperatorInfo* createTableScanOperatorInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime); SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime); -SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv); +SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv); SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); @@ -622,7 +622,7 @@ void doCompactSDataBlock(SSDataBlock* pBlock, int32_t numOfRows, int8_t* p); SSDataBlock* createOutputBuf(SExprInfo* pExpr, int32_t numOfOutput, int32_t numOfRows); -void* destroyOutputBuf(SSDataBlock* pBlock); +void* blockDataDestroy(SSDataBlock* pBlock); void* doDestroyFilterInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols); void setInputDataBlock(SOperatorInfo* pOperator, SQLFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t order); diff --git a/2.0/src/query/src/qExecutor.c b/2.0/src/query/src/qExecutor.c index fdadf39d4d..ca656b81ff 100644 --- a/2.0/src/query/src/qExecutor.c +++ b/2.0/src/query/src/qExecutor.c @@ -336,7 +336,7 @@ SSDataBlock* createOutputBuf(SExprInfo* pExpr, int32_t numOfOutput, int32_t numO return res; } -void* destroyOutputBuf(SSDataBlock* pBlock) { +void* blockDataDestroy(SSDataBlock* pBlock) { if (pBlock == NULL) { return NULL; } @@ -4835,11 +4835,11 @@ int32_t doInitQInfo(SQInfo* pQInfo, STSBuf* pTsBuf, void* tsdb, void* sourceOptr break; } case OP_TableSeqScan: { - pRuntimeEnv->proot = createTableSeqScanOperator(pRuntimeEnv->pTsdbReadHandle, pRuntimeEnv); + pRuntimeEnv->proot = createTableSeqScanOperatorInfo(pRuntimeEnv->pTsdbReadHandle, pRuntimeEnv); break; } case OP_DataBlocksOptScan: { - pRuntimeEnv->proot = createDataBlocksOptScanInfo(pRuntimeEnv->pTsdbReadHandle, pRuntimeEnv, getNumOfScanTimes(pQueryAttr), pQueryAttr->needReverseScan? 1:0); + pRuntimeEnv->proot = createTableScanOperatorInfo(pRuntimeEnv->pTsdbReadHandle, pRuntimeEnv, getNumOfScanTimes(pQueryAttr), pQueryAttr->needReverseScan? 1:0); break; } case OP_TableScan: { @@ -5162,7 +5162,7 @@ SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* return pOperator; } -SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv) { +SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv) { STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); pInfo->pTsdbReadHandle = pTsdbQueryHandle; @@ -5267,7 +5267,7 @@ void setTableScanFilterOperatorInfo(STableScanInfo* pTableScanInfo, SOperatorInf } } -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime) { +SOperatorInfo* createTableScanOperatorInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime) { assert(repeatTime > 0); STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); @@ -5278,7 +5278,7 @@ SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntime pInfo->order = pRuntimeEnv->pQueryAttr->order.order; SOperatorInfo* pOptr = calloc(1, sizeof(SOperatorInfo)); - pOptr->name = "DataBlocksOptimizedScanOperator"; + pOptr->name = "TableScanOperator"; pOptr->operatorType = OP_DataBlocksOptScan; pOptr->pRuntimeEnv = pRuntimeEnv; pOptr->blockingOptr = false; @@ -5373,7 +5373,7 @@ static void destroyGlobalAggOperatorInfo(void* param, int32_t numOfOutput) { static void destroySlimitOperatorInfo(void* param, int32_t numOfOutput) { SSLimitOperatorInfo *pInfo = (SSLimitOperatorInfo*) param; taosArrayDestroy(pInfo->orderColumnList); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); tfree(pInfo->prevRow); } @@ -6566,7 +6566,7 @@ static void doDestroyBasicInfo(SOptrBasicInfo* pInfo, int32_t numOfOutput) { tfree(pInfo->rowCellInfoOffset); cleanupResultRowInfo(&pInfo->resultRowInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } static void destroyBasicOperatorInfo(void* param, int32_t numOfOutput) { @@ -6590,7 +6590,7 @@ static void destroySWindowOperatorInfo(void* param, int32_t numOfOutput) { static void destroySFillOperatorInfo(void* param, int32_t numOfOutput) { SFillOperatorInfo* pInfo = (SFillOperatorInfo*) param; pInfo->pFillInfo = taosDestroyFillInfo(pInfo->pFillInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); tfree(pInfo->p); } @@ -6607,12 +6607,12 @@ static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) { static void destroyTagScanOperatorInfo(void* param, int32_t numOfOutput) { STagScanInfo* pInfo = (STagScanInfo*) param; - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } static void destroyOrderOperatorInfo(void* param, int32_t numOfOutput) { SOrderOperatorInfo* pInfo = (SOrderOperatorInfo*) param; - pInfo->pDataBlock = destroyOutputBuf(pInfo->pDataBlock); + pInfo->pDataBlock = blockDataDestroy(pInfo->pDataBlock); } static void destroyConditionOperatorInfo(void* param, int32_t numOfOutput) { @@ -6625,7 +6625,7 @@ static void destroyDistinctOperatorInfo(void* param, int32_t numOfOutput) { taosHashCleanup(pInfo->pSet); tfree(pInfo->buf); taosArrayDestroy(pInfo->pDistinctDataInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } SOperatorInfo* createMultiTableAggOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { diff --git a/2.0/src/query/tests/resultBufferTest.cpp b/2.0/src/query/tests/resultBufferTest.cpp index 9724b98f7c..b7173ece46 100644 --- a/2.0/src/query/tests/resultBufferTest.cpp +++ b/2.0/src/query/tests/resultBufferTest.cpp @@ -13,7 +13,7 @@ namespace { // simple test void simpleTest() { - SDiskbasedResultBuf* pResultBuf = NULL; + SDiskbasedBuf* pResultBuf = NULL; int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1024, 4096, 1); int32_t pageId = 0; @@ -22,40 +22,40 @@ void simpleTest() { tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); ASSERT_TRUE(pBufPage != NULL); - ASSERT_EQ(getResBufSize(pResultBuf), 1024); + ASSERT_EQ(getTotalBufSize(pResultBuf), 1024); SIDList list = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(list), 1); ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1); - releaseResBufPage(pResultBuf, pBufPage); + releaseBufPage(pResultBuf, pBufPage); tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t = getResBufPage(pResultBuf, pageId); + tFilePage* t = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t == pBufPage1); tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t1 = getResBufPage(pResultBuf, pageId); + tFilePage* t1 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t1 == pBufPage2); tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t2 = getResBufPage(pResultBuf, pageId); + tFilePage* t2 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t2 == pBufPage3); tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t3 = getResBufPage(pResultBuf, pageId); + tFilePage* t3 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t3 == pBufPage4); tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t4 = getResBufPage(pResultBuf, pageId); + tFilePage* t4 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t4 == pBufPage5); destroyResultBuf(pResultBuf); } void writeDownTest() { - SDiskbasedResultBuf* pResultBuf = NULL; + SDiskbasedBuf* pResultBuf = NULL; int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1024, 4*1024, 1); int32_t pageId = 0; @@ -68,31 +68,31 @@ void writeDownTest() { *(int32_t*)(pBufPage->data) = nx; writePageId = pageId; - releaseResBufPage(pResultBuf, pBufPage); + releaseBufPage(pResultBuf, pBufPage); tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t1 = getResBufPage(pResultBuf, pageId); + tFilePage* t1 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t1 == pBufPage1); ASSERT_TRUE(pageId == 1); tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t2 = getResBufPage(pResultBuf, pageId); + tFilePage* t2 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t2 == pBufPage2); ASSERT_TRUE(pageId == 2); tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t3 = getResBufPage(pResultBuf, pageId); + tFilePage* t3 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t3 == pBufPage3); ASSERT_TRUE(pageId == 3); tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t4 = getResBufPage(pResultBuf, pageId); + tFilePage* t4 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t4 == pBufPage4); ASSERT_TRUE(pageId == 4); - releaseResBufPage(pResultBuf, t4); + releaseBufPage(pResultBuf, t4); // flush the written page to disk, and read it out again - tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + tFilePage* pBufPagex = getBufPage(pResultBuf, writePageId); ASSERT_EQ(*(int32_t*)pBufPagex->data, nx); SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); @@ -102,7 +102,7 @@ void writeDownTest() { } void recyclePageTest() { - SDiskbasedResultBuf* pResultBuf = NULL; + SDiskbasedBuf* pResultBuf = NULL; int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1024, 4*1024, 1); int32_t pageId = 0; @@ -112,41 +112,41 @@ void recyclePageTest() { tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); ASSERT_TRUE(pBufPage != NULL); - releaseResBufPage(pResultBuf, pBufPage); + releaseBufPage(pResultBuf, pBufPage); tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t1 = getResBufPage(pResultBuf, pageId); + tFilePage* t1 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t1 == pBufPage1); ASSERT_TRUE(pageId == 1); tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t2 = getResBufPage(pResultBuf, pageId); + tFilePage* t2 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t2 == pBufPage2); ASSERT_TRUE(pageId == 2); tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t3 = getResBufPage(pResultBuf, pageId); + tFilePage* t3 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t3 == pBufPage3); ASSERT_TRUE(pageId == 3); tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t4 = getResBufPage(pResultBuf, pageId); + tFilePage* t4 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t4 == pBufPage4); ASSERT_TRUE(pageId == 4); - releaseResBufPage(pResultBuf, t4); + releaseBufPage(pResultBuf, t4); tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); - tFilePage* t5 = getResBufPage(pResultBuf, pageId); + tFilePage* t5 = getBufPage(pResultBuf, pageId); ASSERT_TRUE(t5 == pBufPage5); ASSERT_TRUE(pageId == 5); // flush the written page to disk, and read it out again - tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + tFilePage* pBufPagex = getBufPage(pResultBuf, writePageId); *(int32_t*)(pBufPagex->data) = nx; writePageId = pageId; // update the data - releaseResBufPage(pResultBuf, pBufPagex); + releaseBufPage(pResultBuf, pBufPagex); - tFilePage* pBufPagex1 = getResBufPage(pResultBuf, 1); + tFilePage* pBufPagex1 = getBufPage(pResultBuf, 1); SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(pa), 6); diff --git a/include/common/common.h b/include/common/common.h index f2518e5945..fd5b6717ab 100644 --- a/include/common/common.h +++ b/include/common/common.h @@ -16,6 +16,11 @@ #ifndef TDENGINE_COMMON_H #define TDENGINE_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + #include "taosdef.h" #include "tarray.h" #include "tmsg.h" @@ -44,8 +49,8 @@ typedef struct { uint32_t numOfTables; - SArray* pGroupList; - SHashObj* map; // speedup acquire the tableQueryInfo by table uid + SArray *pGroupList; + SHashObj *map; // speedup acquire the tableQueryInfo by table uid } STableGroupInfo; typedef struct SColumnDataAgg { @@ -74,18 +79,28 @@ typedef struct SConstantItem { // info.numOfCols = taosArrayGetSize(pDataBlock) + taosArrayGetSize(pConstantList); typedef struct SSDataBlock { - SColumnDataAgg* pBlockAgg; - SArray* pDataBlock; // SArray - SArray* pConstantList; // SArray, it is a constant/tags value of the corresponding result value. - SDataBlockInfo info; + SColumnDataAgg *pBlockAgg; + SArray *pDataBlock; // SArray + SArray *pConstantList; // SArray, it is a constant/tags value of the corresponding result value. + SDataBlockInfo info; } SSDataBlock; +typedef struct SVarColAttr { + int32_t *offset; // start position for each entry in the list + uint32_t length; // used buffer size that contain the valid data + uint32_t allocLen; // allocated buffer size +} SVarColAttr; + // pBlockAgg->numOfNull == info.rows, all data are null // pBlockAgg->numOfNull == 0, no data are null. typedef struct SColumnInfoData { - SColumnInfo info; // TODO filter info needs to be removed - char* nullbitmap; // - char* pData; // the corresponding block data in memory + SColumnInfo info; // TODO filter info needs to be removed + bool hasNull;// if current column data has null value. + char *pData; // the corresponding block data in memory + union { + char *nullbitmap; // bitmap, one bit for each item in the list + SVarColAttr varmeta; + }; } SColumnInfoData; static FORCE_INLINE int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock) { @@ -235,11 +250,11 @@ typedef struct SSqlExpr { char token[TSDB_COL_NAME_LEN]; // original token SSchema resSchema; - int32_t numOfCols; - SColumn* pColumns; // data columns that are required by query - int32_t interBytes; // inter result buffer size - int16_t numOfParams; // argument value of each function - SVariant param[3]; // parameters are not more than 3 + int32_t numOfCols; + SColumn* pColumns; // data columns that are required by query + int32_t interBytes; // inter result buffer size + int16_t numOfParams; // argument value of each function + SVariant param[3]; // parameters are not more than 3 } SSqlExpr; typedef struct SExprInfo { @@ -261,4 +276,8 @@ typedef struct SSessionWindow { #define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) +#ifdef __cplusplus +} +#endif + #endif // TDENGINE_COMMON_H diff --git a/include/common/tep.h b/include/common/tep.h index 69dd385a37..584b8a5a71 100644 --- a/include/common/tep.h +++ b/include/common/tep.h @@ -7,12 +7,22 @@ extern "C" { #include "os.h" #include "tmsg.h" +#include "common.h" typedef struct SCorEpSet { int32_t version; SEpSet epSet; } SCorEpSet; +typedef struct SBlockOrderInfo { + int32_t order; + int32_t colIndex; + SColumnInfoData *pColData; +// int32_t type; +// int32_t bytes; +// bool hasNull; +} SBlockOrderInfo; + int taosGetFqdnPortFromEp(const char *ep, SEp *pEp); void addEpIntoEpSet(SEpSet *pEpSet, const char *fqdn, uint16_t port); @@ -21,6 +31,77 @@ bool isEpsetEqual(const SEpSet *s1, const SEpSet *s2); void updateEpSet_s(SCorEpSet *pEpSet, SEpSet *pNewEpSet); SEpSet getEpSet_s(SCorEpSet *pEpSet); +#define NBIT (3u) +#define BitPos(_n) ((_n) & ((1 << NBIT) - 1)) +#define BMCharPos(bm_, r_) ((bm_)[(r_) >> NBIT]) +#define colDataIsNull_f(bm_, r_) ((BMCharPos(bm_, r_) & (1u << (7u - BitPos(r_)))) == (1u << (7u - BitPos(r_)))) + +#define colDataSetNull_f(bm_, r_) \ + do { \ + BMCharPos(bm_, r_) |= (1u << (7u - BitPos(r_))); \ + } while (0) + +static FORCE_INLINE bool colDataIsNull(const SColumnInfoData* pColumnInfoData, uint32_t totalRows, uint32_t row, SColumnDataAgg* pColAgg) { + if (!pColumnInfoData->hasNull) { + return false; + } + + if (pColAgg != NULL) { + if (pColAgg->numOfNull == totalRows) { + ASSERT(pColumnInfoData->nullbitmap == NULL); + return true; + } else if (pColAgg->numOfNull == 0) { + ASSERT(pColumnInfoData->nullbitmap == NULL); + return false; + } + } + + if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { + return pColumnInfoData->varmeta.offset[row] == -1; + } else { + if (pColumnInfoData->nullbitmap == NULL) { + return false; + } + + return colDataIsNull_f(pColumnInfoData->nullbitmap, row); + } +} + +#define colDataGet(p1_, r_) \ + ((IS_VAR_DATA_TYPE((p1_)->info.type)) ? (p1_)->pData + (p1_)->varmeta.offset[(r_)] \ + : (p1_)->pData + ((r_) * (p1_)->info.bytes)); + +int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, const char* pData, bool isNull); +int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, uint32_t numOfRow1, const SColumnInfoData* pSource, uint32_t numOfRow2); +int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock); + +int32_t colDataGetSize(const SColumnInfoData* pColumnInfoData, int32_t numOfRows); +void colDataTrim(SColumnInfoData* pColumnInfoData); + +size_t colDataGetNumOfCols(const SSDataBlock* pBlock); +size_t colDataGetNumOfRows(const SSDataBlock* pBlock); + +int32_t blockDataMerge(SSDataBlock* pDest, const SSDataBlock* pSrc); +int32_t blockDataSplitRows(SSDataBlock* pBlock, bool hasVarCol, int32_t startIndex, int32_t* stopIndex, int32_t pageSize); +SSDataBlock* blockDataExtractBlock(SSDataBlock* pBlock, int32_t startIndex, int32_t rowCount); + +int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock); +int32_t blockDataFromBuf(SSDataBlock* pBlock, const char* buf); + +size_t blockDataGetSize(const SSDataBlock* pBlock); +size_t blockDataGetRowSize(const SSDataBlock* pBlock); +double blockDataGetSerialRowSize(const SSDataBlock* pBlock); +size_t blockDataGetSerialMetaSize(const SSDataBlock* pBlock); + +size_t blockDataNumOfRowsForSerialize(const SSDataBlock* pBlock, int32_t blockSize); + +int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst); +int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst); + +int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows); +void blockDataClearup(SSDataBlock* pDataBlock, bool hasVarCol); +void *blockDataDestroy(SSDataBlock *pBlock); + #ifdef __cplusplus } #endif diff --git a/include/common/trow.h b/include/common/trow.h index 4bb44e31f0..fad74a359d 100644 --- a/include/common/trow.h +++ b/include/common/trow.h @@ -473,7 +473,7 @@ static int32_t tdSRowResetBuf(SRowBuilder *pBuilder, void *pBuf) { terrno = TSDB_CODE_INVALID_PARA; return terrno; } - + TD_ROW_SET_TYPE(pBuilder->pBuf, pBuilder->rowType); uint32_t len = 0; diff --git a/include/libs/planner/plannerOp.h b/include/libs/planner/plannerOp.h index 31f5457c90..9030ffc946 100644 --- a/include/libs/planner/plannerOp.h +++ b/include/libs/planner/plannerOp.h @@ -24,7 +24,7 @@ #endif OP_ENUM_MACRO(StreamScan) -OP_ENUM_MACRO(DataBlocksOptScan) +OP_ENUM_MACRO(TableScan) OP_ENUM_MACRO(TableSeqScan) OP_ENUM_MACRO(TagScan) OP_ENUM_MACRO(SystemTableScan) diff --git a/include/util/tlosertree.h b/include/util/tlosertree.h index d6ffde82ca..241647ba1e 100644 --- a/include/util/tlosertree.h +++ b/include/util/tlosertree.h @@ -22,28 +22,31 @@ extern "C" { typedef int (*__merge_compare_fn_t)(const void *, const void *, void *param); -typedef struct SLoserTreeNode { +typedef struct STreeNode { int32_t index; - void *pData; -} SLoserTreeNode; + void *pData; // TODO remove it? +} STreeNode; -typedef struct SLoserTreeInfo { - int32_t numOfEntries; - int32_t totalEntries; +typedef struct SMultiwayMergeTreeInfo { + int32_t numOfSources; + int32_t totalSources; __merge_compare_fn_t comparFn; void * param; - SLoserTreeNode *pNode; -} SLoserTreeInfo; + struct STreeNode *pNode; +} SMultiwayMergeTreeInfo; -uint32_t tLoserTreeCreate(SLoserTreeInfo **pTree, int32_t numOfEntries, void *param, __merge_compare_fn_t compareFn); +#define tMergeTreeGetChosenIndex(t_) ((t_)->pNode[0].index) +#define tMergeTreeGetAdjustIndex(t_) (tMergeTreeGetChosenIndex(t_) + (t_)->numOfSources) -void tLoserTreeInit(SLoserTreeInfo *pTree); +int32_t tMergeTreeCreate(SMultiwayMergeTreeInfo **pTree, uint32_t numOfEntries, void *param, __merge_compare_fn_t compareFn); -void tLoserTreeAdjust(SLoserTreeInfo *pTree, int32_t idx); +void tMergeTreeDestroy(SMultiwayMergeTreeInfo* pTree); -void tLoserTreeRebuild(SLoserTreeInfo *pTree); +void tMergeTreeAdjust(SMultiwayMergeTreeInfo *pTree, int32_t idx); -void tLoserTreeDisplay(SLoserTreeInfo *pTree); +void tMergeTreeRebuild(SMultiwayMergeTreeInfo *pTree); + +void tMergeTreePrint(const SMultiwayMergeTreeInfo *pTree); #ifdef __cplusplus } diff --git a/include/util/tpagedbuf.h b/include/util/tpagedbuf.h new file mode 100644 index 0000000000..e989c31cd6 --- /dev/null +++ b/include/util/tpagedbuf.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TPAGEDBUF_H +#define TDENGINE_TPAGEDBUF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tlist.h" +#include "thash.h" +#include "os.h" +#include "tlockfree.h" + +typedef struct SArray* SIDList; +typedef struct SPageInfo SPageInfo; +typedef struct SDiskbasedBuf SDiskbasedBuf; + +#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) // in bytes +#define DEFAULT_PAGE_SIZE (16384L) + +typedef struct SFilePage { + int64_t num; + char data[]; +} SFilePage; + +typedef struct SDiskbasedBufStatis { + int64_t flushBytes; + int64_t loadBytes; + int32_t loadPages; + int32_t getPages; + int32_t releasePages; + int32_t flushPages; +} SDiskbasedBufStatis; + +/** + * create disk-based result buffer + * @param pBuf + * @param rowSize + * @param pagesize + * @param inMemPages + * @param handle + * @return + */ +int32_t createDiskbasedBuffer(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir); + +/** + * + * @param pBuf + * @param groupId + * @param pageId + * @return + */ +SFilePage* getNewDataBuf(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId); + +/** + * + * @param pBuf + * @param groupId + * @return + */ +SIDList getDataBufPagesIdList(SDiskbasedBuf* pBuf, int32_t groupId); + +/** + * get the specified buffer page by id + * @param pBuf + * @param id + * @return + */ +SFilePage* getBufPage(SDiskbasedBuf* pBuf, int32_t id); + +/** + * release the referenced buf pages + * @param pBuf + * @param page + */ +void releaseBufPage(SDiskbasedBuf* pBuf, void* page); + +/** + * + * @param pBuf + * @param pi + */ +void releaseBufPageInfo(SDiskbasedBuf* pBuf, struct SPageInfo* pi); + +/** + * get the total buffer size in the format of disk file + * @param pBuf + * @return + */ +size_t getTotalBufSize(const SDiskbasedBuf* pBuf); + +/** + * get the number of groups in the result buffer + * @param pBuf + * @return + */ +size_t getNumOfResultBufGroupId(const SDiskbasedBuf* pBuf); + +/** + * destroy result buffer + * @param pBuf + */ +void destroyResultBuf(SDiskbasedBuf* pBuf); + +/** + * + * @param pList + * @return + */ +SPageInfo* getLastPageInfo(SIDList pList); + +/** + * + * @param pPgInfo + * @return + */ +int32_t getPageId(const SPageInfo* pPgInfo); + +/** + * Return the buffer page size. + * @param pBuf + * @return + */ +int32_t getBufPageSize(const SDiskbasedBuf* pBuf); + +int32_t getNumOfInMemBufPages(const SDiskbasedBuf* pBuf); + +/** + * + * @param pBuf + * @return + */ +bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf); + +/** + * Set the buffer page is dirty, and needs to be flushed to disk when swap out. + * @param pPageInfo + * @param dirty + */ +void setBufPageDirty(SFilePage* pPageInfo, bool dirty); + +/** + * Print the statistics when closing this buffer + * @param pBuf + */ +void printStatisBeforeClose(SDiskbasedBuf* pBuf); + +/** + * return buf statistics. + */ +SDiskbasedBufStatis getDBufStatis(const SDiskbasedBuf* pBuf); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TPAGEDBUF_H diff --git a/include/util/tpagedfile.h b/include/util/tpagedfile.h deleted file mode 100644 index 5bc4dc92a0..0000000000 --- a/include/util/tpagedfile.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_TPAGEDFILE_H -#define TDENGINE_TPAGEDFILE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "tlist.h" -#include "thash.h" -#include "os.h" -#include "tlockfree.h" - -typedef struct SArray* SIDList; - -typedef struct SPageDiskInfo { - int32_t offset; - int32_t length; -} SPageDiskInfo; - -typedef struct SPageInfo { - SListNode* pn; // point to list node - int32_t pageId; - SPageDiskInfo info; - void* pData; - bool used; // set current page is in used -} SPageInfo; - -typedef struct SFreeListItem { - int32_t offset; - int32_t len; -} SFreeListItem; - -typedef struct SResultBufStatis { - int32_t flushBytes; - int32_t loadBytes; - int32_t getPages; - int32_t releasePages; - int32_t flushPages; -} SResultBufStatis; - -typedef struct SDiskbasedResultBuf { - int32_t numOfPages; - int64_t totalBufSize; - int64_t fileSize; // disk file size - FILE* file; - int32_t allocateId; // allocated page id - char* path; // file path - int32_t pageSize; // current used page size - int32_t inMemPages; // numOfPages that are allocated in memory - SHashObj* groupSet; // id hash table - SHashObj* all; - SList* lruList; - void* emptyDummyIdList; // dummy id list - void* assistBuf; // assistant buffer for compress/decompress data - SArray* pFree; // free area in file - bool comp; // compressed before flushed to disk - int32_t nextPos; // next page flush position - - uint64_t qId; // for debug purpose - SResultBufStatis statis; -} SDiskbasedResultBuf; - -#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) // in bytes -#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1} -#define DEFAULT_PAGE_SIZE (16384L) - -typedef struct SFilePage { - int64_t num; - char data[]; -} SFilePage; - -/** - * create disk-based result buffer - * @param pResultBuf - * @param rowSize - * @param pagesize - * @param inMemPages - * @param handle - * @return - */ -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir); - -/** - * - * @param pResultBuf - * @param groupId - * @param pageId - * @return - */ -SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId); - -/** - * - * @param pResultBuf - * @param groupId - * @return - */ -SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); - -/** - * get the specified buffer page by id - * @param pResultBuf - * @param id - * @return - */ -SFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); - -/** - * release the referenced buf pages - * @param pResultBuf - * @param page - */ -void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); - -/** - * - * @param pResultBuf - * @param pi - */ -void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); - - -/** - * get the total buffer size in the format of disk file - * @param pResultBuf - * @return - */ -size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf); - -/** - * get the number of groups in the result buffer - * @param pResultBuf - * @return - */ -size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf); - -/** - * destroy result buffer - * @param pResultBuf - */ -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf); - -/** - * - * @param pList - * @return - */ -SPageInfo* getLastPageInfo(SIDList pList); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_TPAGEDFILE_H diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index dfe7b12ce4..88752f6dfc 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -9,7 +9,7 @@ #include "tglobal.h" #include "tmsgtype.h" #include "tnote.h" -#include "tpagedfile.h" +#include "tpagedbuf.h" #include "tref.h" static int32_t initEpSetFromCfg(const char *firstEp, const char *secondEp, SCorEpSet *pEpSet); diff --git a/source/client/src/tmq.c b/source/client/src/tmq.c index 315a632180..87b7ac991c 100644 --- a/source/client/src/tmq.c +++ b/source/client/src/tmq.c @@ -25,7 +25,7 @@ #include "tglobal.h" #include "tmsgtype.h" #include "tnote.h" -#include "tpagedfile.h" +#include "tpagedbuf.h" #include "tref.h" struct tmq_list_t { diff --git a/source/common/src/tep.c b/source/common/src/tep.c index 45587a8856..970b6d954f 100644 --- a/source/common/src/tep.c +++ b/source/common/src/tep.c @@ -1,4 +1,5 @@ #include "tep.h" +#include #include "common.h" #include "tglobal.h" #include "tlockfree.h" @@ -60,68 +61,168 @@ SEpSet getEpSet_s(SCorEpSet *pEpSet) { return ep; } -bool colDataIsNull(const SColumnInfoData* pColumnInfoData, uint32_t totalRows, uint32_t row, SColumnDataAgg* pColAgg) { - if (pColAgg != NULL) { - if (pColAgg->numOfNull == totalRows) { - ASSERT(pColumnInfoData->nullbitmap == NULL); - return true; - } else if (pColAgg->numOfNull == 0) { - ASSERT(pColumnInfoData->nullbitmap == NULL); - return false; - } - } +#define BitmapLen(_n) (((_n) + ((1<> NBIT) - if (pColumnInfoData->nullbitmap == NULL) { - return false; - } - - uint8_t v = (pColumnInfoData->nullbitmap[row>>3] & (1<<(8 - (row&0x07)))); - return (v == 1); -} - -bool colDataIsNull_f(const char* bitmap, uint32_t row) { - return (bitmap[row>>3] & (1<<(8 - (row&0x07)))); -} - -void colDataSetNull_f(char* bitmap, uint32_t row) { // TODO - return; -} - -void* colDataGet(const SColumnInfoData* pColumnInfoData, uint32_t row) { +int32_t colDataGetSize(const SColumnInfoData* pColumnInfoData, int32_t numOfRows) { + ASSERT(pColumnInfoData != NULL); if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { - uint32_t offset = ((uint32_t*)pColumnInfoData->pData)[row]; - return (char*)(pColumnInfoData->pData) + offset; // the first part is the pointer to the true binary data + return pColumnInfoData->varmeta.length; } else { - return (char*)(pColumnInfoData->pData) + (row * pColumnInfoData->info.bytes); + return pColumnInfoData->info.bytes * numOfRows; } } +void colDataTrim(SColumnInfoData* pColumnInfoData) { + // TODO +} + int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, const char* pData, bool isNull) { ASSERT(pColumnInfoData != NULL); if (isNull) { - // TODO set null value in the nullbitmap + // There is a placehold for each NULL value of binary or nchar type. + if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { + pColumnInfoData->varmeta.offset[currentRow] = -1; // it is a null value of VAR type. + } else { + colDataSetNull_f(pColumnInfoData->nullbitmap, currentRow); + } + + pColumnInfoData->hasNull = true; return 0; } int32_t type = pColumnInfoData->info.type; if (IS_VAR_DATA_TYPE(type)) { - // TODO continue append var_type + SVarColAttr* pAttr = &pColumnInfoData->varmeta; + if (pAttr->allocLen < pAttr->length + varDataTLen(pData)) { + uint32_t newSize = pAttr->allocLen; + if (newSize == 0) { + newSize = 8; + } + + while(newSize < pAttr->length + varDataTLen(pData)) { + newSize = newSize * 1.5; + } + + char* buf = realloc(pColumnInfoData->pData, newSize); + if (buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pColumnInfoData->pData = buf; + pAttr->allocLen = newSize; + } + + uint32_t len = pColumnInfoData->varmeta.length; + pColumnInfoData->varmeta.offset[currentRow] = len; + + memcpy(pColumnInfoData->pData + len, pData, varDataTLen(pData)); + pColumnInfoData->varmeta.length += varDataTLen(pData); } else { char* p = pColumnInfoData->pData + pColumnInfoData->info.bytes * currentRow; switch(type) { case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_UTINYINT: {*(int8_t*) p = *(int8_t*) pData;break;} + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: {*(int16_t*) p = *(int16_t*) pData;break;} + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: {*(int32_t*) p = *(int32_t*) pData;break;} + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: {*(int64_t*) p = *(int64_t*) pData;break;} default: assert(0); } - } return 0; } -size_t colDataGetCols(const SSDataBlock* pBlock) { +static void doBitmapMerge(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, const SColumnInfoData* pSource, int32_t numOfRow2) { + uint32_t total = numOfRow1 + numOfRow2; + + if (BitmapLen(numOfRow1) < BitmapLen(total)) { + char* tmp = realloc(pColumnInfoData->nullbitmap, BitmapLen(total)); + uint32_t extend = BitmapLen(total) - BitmapLen(numOfRow1); + memset(tmp + BitmapLen(numOfRow1), 0, extend); + pColumnInfoData->nullbitmap = tmp; + } + + uint32_t remindBits = BitPos(numOfRow1); + uint32_t shiftBits = 8 - remindBits; + + if (remindBits == 0) { // no need to shift bits of bitmap + memcpy(pColumnInfoData->nullbitmap + BitmapLen(numOfRow1), pSource->nullbitmap, BitmapLen(numOfRow2)); + } else { + int32_t len = BitmapLen(numOfRow2); + int32_t i = 0; + + uint8_t* p = (uint8_t*)pSource->nullbitmap; + pColumnInfoData->nullbitmap[BitmapLen(numOfRow1) - 1] |= (p[0] >> remindBits); + + uint8_t* start = (uint8_t*)&pColumnInfoData->nullbitmap[BitmapLen(numOfRow1)]; + while (i < len) { + start[i] |= (p[i] << shiftBits); + i += 1; + + if (i > 1) { + start[i - 1] |= (p[i] >> remindBits); + } + } + } +} + +int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, uint32_t numOfRow1, const SColumnInfoData* pSource, uint32_t numOfRow2) { + ASSERT(pColumnInfoData != NULL && pSource != NULL && pColumnInfoData->info.type == pSource->info.type); + + if (numOfRow2 == 0) { + return numOfRow1; + } + + if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { + // Handle the bitmap + char* p = realloc(pColumnInfoData->varmeta.offset, sizeof(int32_t) * (numOfRow1 + numOfRow2)); + if (p == NULL) { + // TODO + } + + pColumnInfoData->varmeta.offset = (int32_t*) p; + for(int32_t i = 0; i < numOfRow2; ++i) { + pColumnInfoData->varmeta.offset[i + numOfRow1] = pSource->varmeta.offset[i] + pColumnInfoData->varmeta.length; + } + + // copy data + uint32_t len = pSource->varmeta.length; + uint32_t oldLen = pColumnInfoData->varmeta.length; + if (pColumnInfoData->varmeta.allocLen < len + oldLen) { + char* tmp = realloc(pColumnInfoData->pData, len + oldLen); + if (tmp == NULL) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + pColumnInfoData->pData = tmp; + pColumnInfoData->varmeta.allocLen = len + oldLen; + } + + memcpy(pColumnInfoData->pData + oldLen, pSource->pData, len); + pColumnInfoData->varmeta.length = len + oldLen; + } else { + doBitmapMerge(pColumnInfoData, numOfRow1, pSource, numOfRow2); + + int32_t newSize = (numOfRow1 + numOfRow2) * pColumnInfoData->info.bytes; + char* tmp = realloc(pColumnInfoData->pData, newSize); + if (tmp == NULL) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + pColumnInfoData->pData = tmp; + int32_t offset = pColumnInfoData->info.bytes * numOfRow1; + memcpy(pColumnInfoData->pData + offset, pSource->pData, pSource->info.bytes * numOfRow2); + } + + return numOfRow1 + numOfRow2; +} + +size_t colDataGetNumOfCols(const SSDataBlock* pBlock) { ASSERT(pBlock); size_t constantCols = (pBlock->pConstantList != NULL)? taosArrayGetSize(pBlock->pConstantList):0; @@ -129,11 +230,11 @@ size_t colDataGetCols(const SSDataBlock* pBlock) { return pBlock->info.numOfCols; } -size_t colDataGetRows(const SSDataBlock* pBlock) { +size_t colDataGetNumOfRows(const SSDataBlock* pBlock) { return pBlock->info.rows; } -int32_t colDataUpdateTsWindow(SSDataBlock* pDataBlock) { +int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock) { if (pDataBlock == NULL || pDataBlock->info.rows <= 0) { return 0; } @@ -153,6 +254,848 @@ int32_t colDataUpdateTsWindow(SSDataBlock* pDataBlock) { return 0; } +int32_t blockDataMerge(SSDataBlock* pDest, const SSDataBlock* pSrc) { + assert(pSrc != NULL && pDest != NULL && pDest->info.numOfCols == pSrc->info.numOfCols); + + int32_t numOfCols = pSrc->info.numOfCols; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol2 = taosArrayGet(pDest->pDataBlock, i); + SColumnInfoData* pCol1 = taosArrayGet(pSrc->pDataBlock, i); + + uint32_t oldLen = colDataGetSize(pCol2, pDest->info.rows); + uint32_t newLen = colDataGetSize(pCol1, pSrc->info.rows); + + int32_t newSize = oldLen + newLen; + char* tmp = realloc(pCol2->pData, newSize); + if (tmp != NULL) { + pCol2->pData = tmp; + colDataMergeCol(pCol2, pDest->info.rows, pCol1, pSrc->info.rows); + } else { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + } + + pDest->info.rows += pSrc->info.rows; + return TSDB_CODE_SUCCESS; +} + +size_t blockDataGetSize(const SSDataBlock* pBlock) { + assert(pBlock != NULL); + + size_t total = 0; + int32_t numOfCols = pBlock->info.numOfCols; + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i); + total += colDataGetSize(pColInfoData, pBlock->info.rows); + + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + total += sizeof(int32_t) * pBlock->info.rows; + } else { + total += BitmapLen(pBlock->info.rows); + } + } + + return total; +} + +// the number of tuples can be fit in one page. +// Actual data rows pluses the corresponding meta data must fit in one memory buffer of the given page size. +int32_t blockDataSplitRows(SSDataBlock* pBlock, bool hasVarCol, int32_t startIndex, int32_t* stopIndex, int32_t pageSize) { + ASSERT(pBlock != NULL && stopIndex != NULL); + + int32_t numOfCols = pBlock->info.numOfCols; + int32_t numOfRows = pBlock->info.rows; + + int32_t bitmapChar = 1; + + size_t headerSize = sizeof(int32_t); + size_t colHeaderSize = sizeof(int32_t) * numOfCols; + size_t payloadSize = pageSize - (headerSize + colHeaderSize); + + // TODO speedup by checking if the whole page can fit in firstly. + if (!hasVarCol) { + size_t rowSize = blockDataGetRowSize(pBlock); + int32_t capacity = (payloadSize / (rowSize * 8 + bitmapChar * numOfCols)) * 8; + + *stopIndex = startIndex + capacity; + if (*stopIndex >= numOfRows) { + *stopIndex = numOfRows - 1; + } + + return TSDB_CODE_SUCCESS; + } else { + // iterate the rows that can be fit in this buffer page + int32_t size = (headerSize + colHeaderSize); + + for(int32_t j = startIndex; j < numOfRows; ++j) { + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(pBlock->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + bool isNull = colDataIsNull(pColInfoData, numOfRows, j, NULL); + if (isNull) { + // do nothing + } else { + char* p = colDataGet(pColInfoData, j); + size += varDataTLen(p); + } + + size += sizeof(pColInfoData->varmeta.offset[0]); + } else { + size += pColInfoData->info.bytes; + + if (((j - startIndex) & 0x07) == 0) { + size += 1; // the space for null bitmap + } + } + } + + if (size > pageSize) { + *stopIndex = j - 1; + ASSERT(*stopIndex > startIndex); + + return TSDB_CODE_SUCCESS; + } + } + + // all fit in + *stopIndex = numOfRows - 1; + return TSDB_CODE_SUCCESS; + } +} + +SSDataBlock* blockDataExtractBlock(SSDataBlock* pBlock, int32_t startIndex, int32_t rowCount) { + if (pBlock == NULL || startIndex < 0 || rowCount > pBlock->info.rows || rowCount + startIndex > pBlock->info.rows) { + return NULL; + } + + SSDataBlock* pDst = calloc(1, sizeof(SSDataBlock)); + if (pDst == NULL) { + return NULL; + } + + pDst->info = pBlock->info; + + pDst->info.rows = 0; + pDst->pDataBlock = taosArrayInit(pBlock->info.numOfCols, sizeof(SColumnInfoData)); + + for(int32_t i = 0; i < pBlock->info.numOfCols; ++i) { + SColumnInfoData colInfo = {0}; + SColumnInfoData* pSrcCol = taosArrayGet(pBlock->pDataBlock, i); + colInfo.info = pSrcCol->info; + + if (IS_VAR_DATA_TYPE(pSrcCol->info.type)) { + SVarColAttr* pAttr = &colInfo.varmeta; + pAttr->offset = calloc(rowCount, sizeof(int32_t)); + } else { + colInfo.nullbitmap = calloc(1, BitmapLen(rowCount)); + colInfo.pData = calloc(rowCount, colInfo.info.bytes); + } + + taosArrayPush(pDst->pDataBlock, &colInfo); + } + + for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) { + SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, i); + SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, i); + + for (int32_t j = startIndex; j < (startIndex + rowCount); ++j) { + bool isNull = colDataIsNull(pColData, pBlock->info.rows, j, pBlock->pBlockAgg); + char* p = colDataGet(pColData, j); + + colDataAppend(pDstCol, j - startIndex, p, isNull); + } + } + + pDst->info.rows = rowCount; + return pDst; +} +/** + * + * +------------------+---------------+--------------------+ + * |the number of rows| column length | column #1 | + * | (4 bytes) | (4 bytes) |--------------------+ + * | | | null bitmap| values| + * +------------------+---------------+--------------------+ + * @param buf + * @param pBlock + * @return + */ +int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { + ASSERT(pBlock != NULL); + // write the number of rows + *(uint32_t*) buf = pBlock->info.rows; + + int32_t numOfCols = pBlock->info.numOfCols; + int32_t numOfRows = pBlock->info.rows; + + char* pStart = buf + sizeof(uint32_t); + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pCol->info.type)) { + memcpy(pStart, pCol->varmeta.offset, numOfRows * sizeof(int32_t)); + pStart += numOfRows * sizeof(int32_t); + } else { + memcpy(pStart, pCol->nullbitmap, BitmapLen(numOfRows)); + pStart += BitmapLen(pBlock->info.rows); + } + + uint32_t dataSize = colDataGetSize(pCol, numOfRows); + + *(int32_t*) pStart = dataSize; + pStart += sizeof(int32_t); + + memcpy(pStart, pCol->pData, dataSize); + pStart += dataSize; + } + + return 0; +} + +int32_t blockDataFromBuf(SSDataBlock* pBlock, const char* buf) { + pBlock->info.rows = *(int32_t*) buf; + + int32_t numOfCols = pBlock->info.numOfCols; + const char* pStart = buf + sizeof(uint32_t); + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); + + size_t metaSize = pBlock->info.rows * sizeof(int32_t); + if (IS_VAR_DATA_TYPE(pCol->info.type)) { + memcpy(pCol->varmeta.offset, pStart, metaSize); + pStart += metaSize; + } else { + memcpy(pCol->nullbitmap, pStart, BitmapLen(pBlock->info.rows)); + pStart += BitmapLen(pBlock->info.rows); + } + + int32_t colLength = *(int32_t*) pStart; + pStart += sizeof(int32_t); + + if (IS_VAR_DATA_TYPE(pCol->info.type)) { + if (pCol->varmeta.allocLen < colLength) { + char* tmp = realloc(pCol->pData, colLength); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pCol->pData = tmp; + pCol->varmeta.allocLen = colLength; + } + + pCol->varmeta.length = colLength; + ASSERT(pCol->varmeta.length <= pCol->varmeta.allocLen); + } + + memcpy(pCol->pData, pStart, colLength); + pStart += colLength; + } + + return TSDB_CODE_SUCCESS; +} + +size_t blockDataGetRowSize(const SSDataBlock* pBlock) { + ASSERT(pBlock != NULL); + size_t rowSize = 0; + + size_t numOfCols = pBlock->info.numOfCols; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + rowSize += pColInfo->info.bytes; + } + + return rowSize; +} + +/** + * @refitem blockDataToBuf for the meta size + * + * @param pBlock + * @return + */ +size_t blockDataGetSerialMetaSize(const SSDataBlock* pBlock) { + return sizeof(int32_t) + pBlock->info.numOfCols * sizeof(int32_t); +} + +double blockDataGetSerialRowSize(const SSDataBlock* pBlock) { + ASSERT(pBlock != NULL); + double rowSize = 0; + + size_t numOfCols = pBlock->info.numOfCols; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + rowSize += pColInfo->info.bytes; + + if (IS_VAR_DATA_TYPE(pColInfo->info.type)) { + rowSize += sizeof(int32_t); + } else { + rowSize += 1/8.0; + } + } + + return rowSize; +} + +typedef struct SSDataBlockSortHelper { + SArray *orderInfo; // SArray + SSDataBlock *pDataBlock; + bool nullFirst; +} SSDataBlockSortHelper; + +int32_t dataBlockCompar(const void* p1, const void* p2, const void* param) { + const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*) param; + + SSDataBlock* pDataBlock = pHelper->pDataBlock; + + int32_t left = *(int32_t*) p1; + int32_t right = *(int32_t*) p2; + + SArray* pInfo = pHelper->orderInfo; + + for(int32_t i = 0; i < pInfo->size; ++i) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); + SColumnInfoData* pColInfoData = pOrder->pColData;//TARRAY_GET_ELEM(pDataBlock->pDataBlock, pOrder->colIndex); + + if (pColInfoData->hasNull) { + bool leftNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, left, pDataBlock->pBlockAgg); + bool rightNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, right, pDataBlock->pBlockAgg); + if (leftNull && rightNull) { + continue; // continue to next slot + } + + if (rightNull) { + return pHelper->nullFirst? 1:-1; + } + + if (leftNull) { + return pHelper->nullFirst? -1:1; + } + } + + void* left1 = colDataGet(pColInfoData, left); + void* right1 = colDataGet(pColInfoData, right); + + switch(pColInfoData->info.type) { + case TSDB_DATA_TYPE_INT: { + int32_t leftx = *(int32_t*) left1; + int32_t rightx = *(int32_t*) right1; + + if (leftx == rightx) { + break; + } else { + if (pOrder->order == TSDB_ORDER_ASC) { + return (leftx < rightx)? -1:1; + } else { + return (leftx < rightx)? 1:-1; + } + } + } + default: + assert(0); + } + } + + return 0; +} + +static int32_t doAssignOneTuple(SColumnInfoData* pDstCols, int32_t numOfRows, const SSDataBlock* pSrcBlock, int32_t tupleIndex) { + int32_t code = 0; + int32_t numOfCols = pSrcBlock->info.numOfCols; + + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pDst = &pDstCols[i]; + SColumnInfoData* pSrc = taosArrayGet(pSrcBlock->pDataBlock, i); + + if (pSrc->hasNull && colDataIsNull(pSrc, pSrcBlock->info.rows, tupleIndex, pSrcBlock->pBlockAgg)) { + code = colDataAppend(pDst, numOfRows, NULL, true); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + char* p = colDataGet(pSrc, tupleIndex); + code = colDataAppend(pDst, numOfRows, p, false); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataBlock, int32_t* index) { +#if 0 + for (int32_t i = 0; i < pDataBlock->info.rows; ++i) { + int32_t code = doAssignOneTuple(pCols, i, pDataBlock, index[i]); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } +#else + for(int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) { + SColumnInfoData* pDst = &pCols[i]; + SColumnInfoData* pSrc = taosArrayGet(pDataBlock->pDataBlock, i); + + if (IS_VAR_DATA_TYPE(pSrc->info.type)) { + memcpy(pDst->pData, pSrc->pData, pSrc->varmeta.length); + pDst->varmeta.length = pSrc->varmeta.length; + + for(int32_t j = 0; j < pDataBlock->info.rows; ++j) { + pDst->varmeta.offset[j] = pSrc->varmeta.offset[index[j]]; + } + } else { + switch (pSrc->info.type) { + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_INT: { + for (int32_t j = 0; j < pDataBlock->info.rows; ++j) { + int32_t* p = (int32_t*)pDst->pData; + int32_t* srclist = (int32_t*)pSrc->pData; + + p[j] = srclist[index[j]]; + if (colDataIsNull_f(pSrc->nullbitmap, index[j])) { + colDataSetNull_f(pDst->nullbitmap, j); + } + } + break; + } + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_TINYINT: { + for (int32_t j = 0; j < pDataBlock->info.rows; ++j) { + int32_t* p = (int32_t*)pDst->pData; + int32_t* srclist = (int32_t*)pSrc->pData; + + p[j] = srclist[index[j]]; + if (colDataIsNull_f(pSrc->nullbitmap, index[j])) { + colDataSetNull_f(pDst->nullbitmap, j); + } + } + break; + } + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_SMALLINT: { + for (int32_t j = 0; j < pDataBlock->info.rows; ++j) { + int32_t* p = (int32_t*)pDst->pData; + int32_t* srclist = (int32_t*)pSrc->pData; + + p[j] = srclist[index[j]]; + if (colDataIsNull_f(pSrc->nullbitmap, index[j])) { + colDataSetNull_f(pDst->nullbitmap, j); + } + } + break; + } + case TSDB_DATA_TYPE_UBIGINT: + case TSDB_DATA_TYPE_BIGINT: { + for (int32_t j = 0; j < pDataBlock->info.rows; ++j) { + int32_t* p = (int32_t*)pDst->pData; + int32_t* srclist = (int32_t*)pSrc->pData; + + p[j] = srclist[index[j]]; + if (colDataIsNull_f(pSrc->nullbitmap, index[j])) { + colDataSetNull_f(pDst->nullbitmap, j); + } + } + break; + } + default: + assert(0); + } + } + } +#endif + return TSDB_CODE_SUCCESS; +} + +static SColumnInfoData* createHelpColInfoData(const SSDataBlock* pDataBlock) { + int32_t rows = pDataBlock->info.rows; + int32_t numOfCols = pDataBlock->info.numOfCols; + + SColumnInfoData* pCols = calloc(numOfCols, sizeof(SColumnInfoData)); + if (pCols == NULL) { + return NULL; + } + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, i); + pCols[i].info = pColInfoData->info; + + if (IS_VAR_DATA_TYPE(pCols[i].info.type)) { + pCols[i].varmeta.offset = calloc(rows, sizeof(int32_t)); + pCols[i].pData = calloc(1, pColInfoData->varmeta.length); + + pCols[i].varmeta.length = pColInfoData->varmeta.length; + pCols[i].varmeta.allocLen = pCols[i].varmeta.length; + } else { + pCols[i].nullbitmap = calloc(1, BitmapLen(rows)); + pCols[i].pData = calloc(rows, pCols[i].info.bytes); + } + } + + return pCols; +} + +static void copyBackToBlock(SSDataBlock* pDataBlock, SColumnInfoData* pCols) { + int32_t numOfCols = pDataBlock->info.numOfCols; + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, i); + pColInfoData->info = pCols[i].info; + + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + tfree(pColInfoData->varmeta.offset); + pColInfoData->varmeta = pCols[i].varmeta; + } else { + tfree(pColInfoData->nullbitmap); + pColInfoData->nullbitmap = pCols[i].nullbitmap; + } + + tfree(pColInfoData->pData); + pColInfoData->pData = pCols[i].pData; + } + + tfree(pCols); +} + +static int32_t* createTupleIndex(size_t rows) { + int32_t* index = calloc(rows, sizeof(int32_t)); + if (index == NULL) { + return NULL; + } + + for(int32_t i = 0; i < rows; ++i) { + index[i] = i; + } + + return index; +} + +static void destroyTupleIndex(int32_t* index) { + tfree(index); +} + +static __compar_fn_t getComparFn(int32_t type, int32_t order) { + switch(type) { + case TSDB_DATA_TYPE_TINYINT: return order == TSDB_ORDER_ASC? compareInt8Val:compareInt8ValDesc; + case TSDB_DATA_TYPE_SMALLINT: return order == TSDB_ORDER_ASC? compareInt16Val:compareInt16ValDesc; + case TSDB_DATA_TYPE_INT: return order == TSDB_ORDER_ASC? compareInt32Val:compareInt32ValDesc; + case TSDB_DATA_TYPE_BIGINT: return order == TSDB_ORDER_ASC? compareInt64Val:compareInt64ValDesc; + case TSDB_DATA_TYPE_FLOAT: return order == TSDB_ORDER_ASC? compareFloatVal:compareFloatValDesc; + case TSDB_DATA_TYPE_DOUBLE: return order == TSDB_ORDER_ASC? compareDoubleVal:compareDoubleValDesc; + case TSDB_DATA_TYPE_UTINYINT: return order == TSDB_ORDER_ASC? compareUint8Val:compareUint8ValDesc; + case TSDB_DATA_TYPE_USMALLINT:return order == TSDB_ORDER_ASC? compareUint16Val:compareUint16ValDesc; + case TSDB_DATA_TYPE_UINT: return order == TSDB_ORDER_ASC? compareUint32Val:compareUint32ValDesc; + case TSDB_DATA_TYPE_UBIGINT: return order == TSDB_ORDER_ASC? compareUint64Val:compareUint64ValDesc; + default: + return order == TSDB_ORDER_ASC? compareInt32Val:compareInt32ValDesc; + } +} + +int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) { + ASSERT(pDataBlock != NULL && pOrderInfo != NULL); + if (pDataBlock->info.rows <= 1) { + return TSDB_CODE_SUCCESS; + } + + // Allocate the additional buffer. + uint32_t rows = pDataBlock->info.rows; + + bool sortColumnHasNull = false; + bool varTypeSort = false; + + for (int32_t i = 0; i < taosArrayGetSize(pOrderInfo); ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, pInfo->colIndex); + if (pColInfoData->hasNull) { + sortColumnHasNull = true; + } + + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + varTypeSort = true; + } + } + + if (taosArrayGetSize(pOrderInfo) == 1 && (!sortColumnHasNull)) { + if (pDataBlock->info.numOfCols == 1) { + if (!varTypeSort) { + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, 0); + SBlockOrderInfo* pOrder = taosArrayGet(pOrderInfo, 0); + + int64_t p0 = taosGetTimestampUs(); + + __compar_fn_t fn = getComparFn(pColInfoData->info.type, pOrder->order); + qsort(pColInfoData->pData, pDataBlock->info.rows, pColInfoData->info.bytes, fn); + + int64_t p1 = taosGetTimestampUs(); + printf("sort:%ld, rows:%d\n", p1 - p0, pDataBlock->info.rows); + + return TSDB_CODE_SUCCESS; + } else { // var data type + + } + } else if (pDataBlock->info.numOfCols == 2) { + + } + } + + int32_t* index = createTupleIndex(rows); + if (index == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + int64_t p0 = taosGetTimestampUs(); + + SSDataBlockSortHelper helper = {.nullFirst = nullFirst, .pDataBlock = pDataBlock, .orderInfo = pOrderInfo}; + for(int32_t i = 0; i < taosArrayGetSize(helper.orderInfo); ++i) { + struct SBlockOrderInfo* pInfo = taosArrayGet(helper.orderInfo, i); + pInfo->pColData = taosArrayGet(pDataBlock->pDataBlock, pInfo->colIndex); + } + + taosqsort(index, rows, sizeof(int32_t), &helper, dataBlockCompar); + + int64_t p1 = taosGetTimestampUs(); + + SColumnInfoData* pCols = createHelpColInfoData(pDataBlock); + if (pCols == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + int64_t p2 = taosGetTimestampUs(); + + int32_t code = blockDataAssign(pCols, pDataBlock, index); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return code; + } + + int64_t p3 = taosGetTimestampUs(); + + copyBackToBlock(pDataBlock, pCols); + int64_t p4 = taosGetTimestampUs(); + + printf("sort:%ld, create:%ld, assign:%ld, copyback:%ld, rows:%d\n", p1-p0, p2 - p1, p3 - p2, p4-p3, rows); + destroyTupleIndex(index); + + return TSDB_CODE_SUCCESS; +} + +typedef struct SHelper { + int32_t index; + union {char *pData; int64_t i64; double d64;}; +} SHelper; + +SHelper* createTupleIndex_rv(int32_t numOfRows, SArray* pOrderInfo, SSDataBlock* pBlock) { + int32_t sortValLengthPerRow = 0; + int32_t numOfCols = taosArrayGetSize(pOrderInfo); + + for(int32_t i = 0; i < numOfCols; ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, pInfo->colIndex); + pInfo->pColData = pColInfo; + sortValLengthPerRow += pColInfo->info.bytes; + } + + size_t len = sortValLengthPerRow * pBlock->info.rows; + + char* buf = calloc(1, len); + SHelper* phelper = calloc(numOfRows, sizeof(SHelper)); + for(int32_t i = 0; i < numOfRows; ++i) { + phelper[i].index = i; + phelper[i].pData = buf + sortValLengthPerRow * i; + } + + int32_t offset = 0; + for(int32_t i = 0; i < numOfCols; ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + for(int32_t j = 0; j < numOfRows; ++j) { + phelper[j].i64 = *(int32_t*) pInfo->pColData->pData + pInfo->pColData->info.bytes * j; +// memcpy(phelper[j].pData + offset, pInfo->pColData->pData + pInfo->pColData->info.bytes * j, pInfo->pColData->info.bytes); + } + + offset += pInfo->pColData->info.bytes; + } + + return phelper; +} + +int32_t dataBlockCompar_rv(const void* p1, const void* p2, const void* param) { + const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*) param; + +// SSDataBlock* pDataBlock = pHelper->pDataBlock; + + SHelper* left = (SHelper*) p1; + SHelper* right = (SHelper*) p2; + + SArray* pInfo = pHelper->orderInfo; + + int32_t offset = 0; +// for(int32_t i = 0; i < pInfo->size; ++i) { +// SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, 0); +// SColumnInfoData* pColInfoData = pOrder->pColData;//TARRAY_GET_ELEM(pDataBlock->pDataBlock, pOrder->colIndex); + +// if (pColInfoData->hasNull) { +// bool leftNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, left, pDataBlock->pBlockAgg); +// bool rightNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, right, pDataBlock->pBlockAgg); +// if (leftNull && rightNull) { +// continue; // continue to next slot +// } +// +// if (rightNull) { +// return pHelper->nullFirst? 1:-1; +// } +// +// if (leftNull) { +// return pHelper->nullFirst? -1:1; +// } +// } + +// void* left1 = colDataGet(pColInfoData, left); +// void* right1 = colDataGet(pColInfoData, right); + +// switch(pColInfoData->info.type) { +// case TSDB_DATA_TYPE_INT: { + int32_t leftx = *(int32_t*)left->pData;//*(int32_t*)(left->pData + offset); + int32_t rightx = *(int32_t*)right->pData;//*(int32_t*)(right->pData + offset); + +// offset += pColInfoData->info.bytes; + if (leftx == rightx) { +// break; + return 0; + } else { +// if (pOrder->order == TSDB_ORDER_ASC) { + return (leftx < rightx)? -1:1; +// } else { +// return (leftx < rightx)? 1:-1; +// } + } +// } +// default: +// assert(0); +// } +// } + + return 0; +} + +int32_t varColSort(SColumnInfoData* pColumnInfoData, SBlockOrderInfo* pOrder) { + +} + +int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) { +// Allocate the additional buffer. + int64_t p0 = taosGetTimestampUs(); + + SSDataBlockSortHelper helper = {.nullFirst = nullFirst, .pDataBlock = pDataBlock, .orderInfo = pOrderInfo}; + + uint32_t rows = pDataBlock->info.rows; + SHelper* index = createTupleIndex_rv(rows, helper.orderInfo, pDataBlock); + if (index == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + taosqsort(index, rows, sizeof(SHelper), &helper, dataBlockCompar_rv); + + int64_t p1 = taosGetTimestampUs(); + SColumnInfoData* pCols = createHelpColInfoData(pDataBlock); + if (pCols == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + int64_t p2 = taosGetTimestampUs(); + + // int32_t code = blockDataAssign(pCols, pDataBlock, index); + // if (code != TSDB_CODE_SUCCESS) { + // terrno = code; + // return code; + // } + + int64_t p3 = taosGetTimestampUs(); + + copyBackToBlock(pDataBlock, pCols); + int64_t p4 = taosGetTimestampUs(); + + printf("sort:%ld, create:%ld, assign:%ld, copyback:%ld, rows:%d\n", p1 - p0, p2 - p1, p3 - p2, p4 - p3, rows); + // destroyTupleIndex(index); +} + +void blockDataClearup(SSDataBlock* pDataBlock, bool hasVarCol) { + pDataBlock->info.rows = 0; + + if (hasVarCol) { + for (int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) { + SColumnInfoData* p = taosArrayGet(pDataBlock->pDataBlock, i); + + if (IS_VAR_DATA_TYPE(p->info.type)) { + p->varmeta.length = 0; + } + } + } +} + +int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows) { + for(int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) { + SColumnInfoData* p = taosArrayGet(pDataBlock->pDataBlock, i); + if (IS_VAR_DATA_TYPE(p->info.type)) { + char* tmp = realloc(p->varmeta.offset, sizeof(int32_t) * numOfRows); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + p->varmeta.offset = (int32_t*)tmp; + memset(p->varmeta.offset, 0, sizeof(int32_t) * numOfRows); + + p->varmeta.length = 0; + p->varmeta.allocLen = 0; + tfree(p->pData); + } else { + char* tmp = realloc(p->nullbitmap, BitmapLen(numOfRows)); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + p->nullbitmap = tmp; + memset(p->nullbitmap, 0, BitmapLen(numOfRows)); + + tmp = realloc(p->pData, numOfRows * p->info.bytes); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + p->pData = tmp; + } + } + + return TSDB_CODE_SUCCESS; +} + +void* blockDataDestroy(SSDataBlock* pBlock) { + if (pBlock == NULL) { + return NULL; + } + + int32_t numOfOutput = pBlock->info.numOfCols; + for(int32_t i = 0; i < numOfOutput; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + tfree(pColInfoData->varmeta.offset); + } else { + tfree(pColInfoData->nullbitmap); + } + + tfree(pColInfoData->pData); + } + + taosArrayDestroy(pBlock->pDataBlock); + tfree(pBlock->pBlockAgg); + tfree(pBlock); + return NULL; +} \ No newline at end of file diff --git a/source/common/src/tname.c b/source/common/src/tname.c index f8ef9f0979..f3deb84ccf 100644 --- a/source/common/src/tname.c +++ b/source/common/src/tname.c @@ -1,3 +1,4 @@ +#include #include "os.h" #include "tutil.h" @@ -268,4 +269,5 @@ SSchema createSchema(uint8_t type, int32_t bytes, int32_t colId, const char* nam tstrncpy(s.name, name, tListLen(s.name)); return s; -} \ No newline at end of file +} + diff --git a/source/common/src/ttszip.c b/source/common/src/ttszip.c index 41eebc5da4..6d57992c35 100644 --- a/source/common/src/ttszip.c +++ b/source/common/src/ttszip.c @@ -344,8 +344,8 @@ STSBlock* readDataFromDisk(STSBuf* pTSBuf, int32_t order, bool decomp) { UNUSED(ret); } - fread(&pBlock->tag.nType, sizeof(pBlock->tag.nType), 1, pTSBuf->f); - fread(&pBlock->tag.nLen, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); + int32_t ret = fread(&pBlock->tag.nType, sizeof(pBlock->tag.nType), 1, pTSBuf->f); + ret = fread(&pBlock->tag.nLen, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); // NOTE: mix types tags are not supported size_t sz = 0; diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index b91b6b06f2..e9e8d086b3 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -1,11 +1,12 @@ +#include #include +#include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wwrite-strings" #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-but-set-variable" #pragma GCC diagnostic ignored "-Wsign-compare" #include "os.h" @@ -96,4 +97,199 @@ TEST(testCase, toInteger_test) { ASSERT_EQ(ret, -1); } +TEST(testCase, Datablock_test) { + SSDataBlock* b = static_cast(calloc(1, sizeof(SSDataBlock))); + b->info.numOfCols = 2; + b->pDataBlock = taosArrayInit(4, sizeof(SColumnInfoData)); + + SColumnInfoData infoData = {0}; + infoData.info.bytes = 4; + infoData.info.type = TSDB_DATA_TYPE_INT; + infoData.info.colId = 1; + + infoData.pData = (char*) calloc(40, infoData.info.bytes); + infoData.nullbitmap = (char*) calloc(1, sizeof(char) * (40/8)); + taosArrayPush(b->pDataBlock, &infoData); + + SColumnInfoData infoData1 = {0}; + infoData1.info.bytes = 40; + infoData1.info.type = TSDB_DATA_TYPE_BINARY; + infoData1.info.colId = 2; + + infoData1.varmeta.offset = (int32_t*) calloc(40, sizeof(uint32_t)); + taosArrayPush(b->pDataBlock, &infoData1); + + char* str = "the value of: %d"; + char buf[128] = {0}; + char varbuf[128] = {0}; + + for(int32_t i = 0; i < 40; ++i) { + SColumnInfoData* p0 = (SColumnInfoData *) taosArrayGet(b->pDataBlock, 0); + SColumnInfoData* p1 = (SColumnInfoData *) taosArrayGet(b->pDataBlock, 1); + + if (i&0x01) { + int32_t len = sprintf(buf, str, i); + STR_TO_VARSTR(varbuf, buf) + colDataAppend(p0, i, (const char*) &i, false); + colDataAppend(p1, i, (const char*) varbuf, false); + + memset(varbuf, 0, sizeof(varbuf)); + memset(buf, 0, sizeof(buf)); + } else { + colDataAppend(p0, i, (const char*) &i, true); + colDataAppend(p1, i, (const char*) varbuf, true); + } + + b->info.rows++; + } + + SColumnInfoData* p0 = (SColumnInfoData *) taosArrayGet(b->pDataBlock, 0); + SColumnInfoData* p1 = (SColumnInfoData *) taosArrayGet(b->pDataBlock, 1); + for(int32_t i = 0; i < 40; ++i) { + if (i & 0x01) { + ASSERT_EQ(colDataIsNull_f(p0->nullbitmap, i), false); + ASSERT_EQ(colDataIsNull(p1, b->info.rows, i, nullptr), false); + } else { + ASSERT_EQ(colDataIsNull_f(p0->nullbitmap, i), true); + + ASSERT_EQ(colDataIsNull(p0, b->info.rows, i, nullptr), true); + ASSERT_EQ(colDataIsNull(p1, b->info.rows, i, nullptr), true); + } + } + + printf("binary column length:%d\n", *(int32_t*) p1->pData); + + ASSERT_EQ(colDataGetNumOfCols(b), 2); + ASSERT_EQ(colDataGetNumOfRows(b), 40); + + char* pData = colDataGet(p1, 3); + printf("the second row of binary:%s, length:%d\n", (char*)varDataVal(pData), varDataLen(pData)); + + SArray* pOrderInfo = taosArrayInit(3, sizeof(SBlockOrderInfo)); + SBlockOrderInfo order = {.order = TSDB_ORDER_ASC, .colIndex = 0}; + taosArrayPush(pOrderInfo, &order); + + blockDataSort(b, pOrderInfo, true); + blockDataDestroy(b); + + taosArrayDestroy(pOrderInfo); +} + +#if 0 +TEST(testCase, non_var_dataBlock_split_test) { + SSDataBlock* b = static_cast(calloc(1, sizeof(SSDataBlock))); + b->info.numOfCols = 2; + b->pDataBlock = taosArrayInit(4, sizeof(SColumnInfoData)); + + SColumnInfoData infoData = {0}; + infoData.info.bytes = 4; + infoData.info.type = TSDB_DATA_TYPE_INT; + infoData.info.colId = 1; + + int32_t numOfRows = 1000000; + + infoData.pData = (char*) calloc(numOfRows, infoData.info.bytes); + infoData.nullbitmap = (char*) calloc(1, sizeof(char) * (numOfRows/8)); + taosArrayPush(b->pDataBlock, &infoData); + + SColumnInfoData infoData1 = {0}; + infoData1.info.bytes = 1; + infoData1.info.type = TSDB_DATA_TYPE_TINYINT; + infoData1.info.colId = 2; + + infoData1.pData = (char*) calloc(numOfRows, infoData.info.bytes); + infoData1.nullbitmap = (char*) calloc(1, sizeof(char) * (numOfRows/8)); + taosArrayPush(b->pDataBlock, &infoData1); + + for(int32_t i = 0; i < numOfRows; ++i) { + SColumnInfoData* p0 = (SColumnInfoData*)taosArrayGet(b->pDataBlock, 0); + SColumnInfoData* p1 = (SColumnInfoData*)taosArrayGet(b->pDataBlock, 1); + + int8_t v = i; + colDataAppend(p0, i, (const char*)&i, false); + colDataAppend(p1, i, (const char*)&v, false); + b->info.rows++; + } + + int32_t pageSize = 64 * 1024; + + int32_t startIndex= 0; + int32_t stopIndex = 0; + int32_t count = 1; + while(1) { + blockDataSplitRows(b, false, startIndex, &stopIndex, pageSize); + printf("the %d split, from: %d to %d\n", count++, startIndex, stopIndex); + + if (stopIndex == numOfRows - 1) { + break; + } + + startIndex = stopIndex + 1; + } + +} + +#endif + +TEST(testCase, var_dataBlock_split_test) { + SSDataBlock* b = static_cast(calloc(1, sizeof(SSDataBlock))); + b->info.numOfCols = 2; + b->pDataBlock = taosArrayInit(4, sizeof(SColumnInfoData)); + + int32_t numOfRows = 1000000; + + SColumnInfoData infoData = {0}; + infoData.info.bytes = 4; + infoData.info.type = TSDB_DATA_TYPE_INT; + infoData.info.colId = 1; + + infoData.pData = (char*) calloc(numOfRows, infoData.info.bytes); + infoData.nullbitmap = (char*) calloc(1, sizeof(char) * (numOfRows/8)); + taosArrayPush(b->pDataBlock, &infoData); + + SColumnInfoData infoData1 = {0}; + infoData1.info.bytes = 40; + infoData1.info.type = TSDB_DATA_TYPE_BINARY; + infoData1.info.colId = 2; + + infoData1.varmeta.offset = (int32_t*) calloc(numOfRows, sizeof(uint32_t)); + taosArrayPush(b->pDataBlock, &infoData1); + + char buf[41] = {0}; + char buf1[100] = {0}; + + for(int32_t i = 0; i < numOfRows; ++i) { + SColumnInfoData* p0 = (SColumnInfoData*)taosArrayGet(b->pDataBlock, 0); + SColumnInfoData* p1 = (SColumnInfoData*)taosArrayGet(b->pDataBlock, 1); + + int8_t v = i; + colDataAppend(p0, i, (const char*)&i, false); + + sprintf(buf, "the number of row:%d", i); + int32_t len = sprintf(buf1, buf, i); + STR_TO_VARSTR(buf1, buf) + colDataAppend(p1, i, buf1, false); + b->info.rows++; + + memset(buf, 0, sizeof(buf)); + memset(buf1, 0, sizeof(buf1)); + } + + int32_t pageSize = 64 * 1024; + + int32_t startIndex= 0; + int32_t stopIndex = 0; + int32_t count = 1; + while(1) { + blockDataSplitRows(b, true, startIndex, &stopIndex, pageSize); + printf("the %d split, from: %d to %d\n", count++, startIndex, stopIndex); + + if (stopIndex == numOfRows - 1) { + break; + } + + startIndex = stopIndex + 1; + } +} + #pragma GCC diagnostic pop \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 5bbc309661..bed2c0cd41 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2174,8 +2174,8 @@ static int32_t createDataBlocksInfo(STsdbReadHandle* pTsdbReadHandle, int32_t nu assert(cnt <= numOfBlocks && numOfQualTables <= numOfTables); // the pTableQueryInfo[j]->numOfBlocks may be 0 sup.numOfTables = numOfQualTables; - SLoserTreeInfo* pTree = NULL; - uint8_t ret = tLoserTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar); + SMultiwayMergeTreeInfo* pTree = NULL; + uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar); if (ret != TSDB_CODE_SUCCESS) { cleanBlockOrderSupporter(&sup, numOfTables); return TSDB_CODE_TDB_OUT_OF_MEMORY; @@ -2184,7 +2184,7 @@ static int32_t createDataBlocksInfo(STsdbReadHandle* pTsdbReadHandle, int32_t nu int32_t numOfTotal = 0; while (numOfTotal < cnt) { - int32_t pos = pTree->pNode[0].index; + int32_t pos = tMergeTreeGetChosenIndex(pTree); int32_t index = sup.blockIndexArray[pos]++; STableBlockInfo* pBlocksInfo = sup.pDataBlockInfo[pos]; @@ -2195,7 +2195,7 @@ static int32_t createDataBlocksInfo(STsdbReadHandle* pTsdbReadHandle, int32_t nu sup.blockIndexArray[pos] = sup.numOfBlocksPerTable[pos] + 1; } - tLoserTreeAdjust(pTree, pos + sup.numOfTables); + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); } /* @@ -3643,13 +3643,13 @@ int32_t tsdbQuerySTableByTagCond(void* pMeta, uint64_t uid, TSKEY skey, const ch SColIndex* pColIndex, int32_t numOfCols, uint64_t reqId, uint64_t taskId) { STbCfg* pTbCfg = metaGetTbInfoByUid(pMeta, uid); if (pTbCfg == NULL) { -// tsdbError("%p failed to get stable, uid:%"PRIu64", TID:0x%"PRIx64" QID:0x%"PRIx64, tsdb, uid, taskId, reqId); + tsdbError("%p failed to get stable, uid:%"PRIu64", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId); terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; goto _error; } if (pTbCfg->type != META_SUPER_TABLE) { -// tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", TID:0x%"PRIx64" QID:0x%"PRIx64, tsdb, uid, taskId, reqId); + tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId); terrno = TSDB_CODE_OPS_NOT_SUPPORT; //basically, this error is caused by invalid sql issued by client goto _error; } @@ -3668,8 +3668,8 @@ int32_t tsdbQuerySTableByTagCond(void* pMeta, uint64_t uid, TSKEY skey, const ch pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(res); pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey); -// tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu, TID:0x%"PRIx64" QID:0x%"PRIx64, tsdb, -// pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList), taskId, reqId); + tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu, TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, + pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList), taskId, reqId); taosArrayDestroy(res); return ret; diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 130e46fc4c..10d884cb3f 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -16,8 +16,8 @@ #define TDENGINE_QUERYUTIL_H #include "common.h" -#include "tpagedfile.h" #include "tbuffer.h" +#include "tpagedbuf.h" #define SET_RES_WINDOW_KEY(_k, _ori, _len, _uid) \ do { \ @@ -126,6 +126,13 @@ static FORCE_INLINE char* getPosInResultPage(struct STaskAttr* pQueryAttr, SFile // return ((char *)page->data) + rowOffset + offset * numOfRows; } +static FORCE_INLINE char* getPosInResultPage_rv(SFilePage* page, int32_t rowOffset, int32_t offset) { + assert(rowOffset >= 0); + + int32_t numOfRows = 1;//(int32_t)getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery); + return ((char *)page->data) + rowOffset + offset * numOfRows; +} + //bool isNullOperator(SColumnFilterElem *pFilter, const char* minval, const char* maxval, int16_t type); //bool notNullOperator(SColumnFilterElem *pFilter, const char* minval, const char* maxval, int16_t type); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 41ed1739b9..232b54554f 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -15,35 +15,40 @@ #ifndef TDENGINE_EXECUTORIMPL_H #define TDENGINE_EXECUTORIMPL_H +#ifdef __cplusplus +extern "C" { +#endif + #include "os.h" #include "common.h" +#include "tlosertree.h" #include "ttszip.h" #include "tvariant.h" #include "dataSinkMgt.h" #include "executil.h" +#include "executor.h" #include "planner.h" #include "taosdef.h" #include "tarray.h" #include "tfilter.h" #include "thash.h" #include "tlockfree.h" -#include "tpagedfile.h" -#include "executor.h" +#include "tpagedbuf.h" struct SColumnFilterElem; typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); #define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED) -#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0u) +#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0u) #define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP) -#define GET_TABLEGROUP(q, _index) ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index))) +#define GET_TABLEGROUP(q, _index) ((SArray*)taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index))) -#define GET_NUM_OF_RESULTS(_r) (((_r)->outputBuf) == NULL? 0:((_r)->outputBuf)->info.rows) +#define GET_NUM_OF_RESULTS(_r) (((_r)->outputBuf) == NULL ? 0 : ((_r)->outputBuf)->info.rows) -#define NEEDTO_COMPRESS_QUERY(size) ((size) > tsCompressColData? 1 : 0) +#define NEEDTO_COMPRESS_QUERY(size) ((size) > tsCompressColData ? 1 : 0) enum { // when this task starts to execute, this status will set @@ -62,8 +67,8 @@ enum { }; typedef struct SResultRowCell { - uint64_t groupId; - SResultRow *pRow; + uint64_t groupId; + SResultRow* pRow; } SResultRowCell; /** @@ -80,25 +85,23 @@ typedef struct SColumnFilterElem { int16_t bytes; // column length __filter_func_t fp; SColumnFilterInfo filterInfo; - void *q; + void* q; } SColumnFilterElem; typedef struct SSingleColumnFilterInfo { void* pData; - void* pData2; //used for nchar column + void* pData2; // used for nchar column int32_t numOfFilters; SColumnInfo info; SColumnFilterElem* pFilters; } SSingleColumnFilterInfo; typedef struct STableQueryInfo { - TSKEY lastKey; - int32_t groupIndex; // group id in table list - SVariant tag; - STimeWindow win; // todo remove it later - STSCursor cur; - void* pTable; // for retrieve the page id list - SResultRowInfo resInfo; + TSKEY lastKey; // last check ts + uint64_t uid; // table uid + int32_t groupIndex; // group id in table list +// SVariant tag; + SResultRowInfo resInfo; // result info } STableQueryInfo; typedef enum { @@ -109,11 +112,11 @@ typedef enum { typedef struct { EQueryProfEventType eventType; - int64_t eventTime; + int64_t eventTime; union { - uint8_t operatorType; //for operator event - int32_t abortCode; //for query abort event + uint8_t operatorType; // for operator event + int32_t abortCode; // for query abort event }; } SQueryProfEvent; @@ -124,33 +127,33 @@ typedef struct { } SOperatorProfResult; typedef struct STaskCostInfo { - int64_t created; - int64_t start; - int64_t end; + int64_t created; + int64_t start; + int64_t end; - uint64_t loadStatisTime; - uint64_t loadFileBlockTime; - uint64_t loadDataInCacheTime; - uint64_t loadStatisSize; - uint64_t loadFileBlockSize; - uint64_t loadDataInCacheSize; + uint64_t loadStatisTime; + uint64_t loadFileBlockTime; + uint64_t loadDataInCacheTime; + uint64_t loadStatisSize; + uint64_t loadFileBlockSize; + uint64_t loadDataInCacheSize; - uint64_t loadDataTime; - uint64_t totalRows; - uint64_t totalCheckedRows; - uint32_t totalBlocks; - uint32_t loadBlocks; - uint32_t loadBlockStatis; - uint32_t discardBlocks; - uint64_t elapsedTime; - uint64_t firstStageMergeTime; - uint64_t winInfoSize; - uint64_t tableInfoSize; - uint64_t hashSize; - uint64_t numOfTimeWindows; + uint64_t loadDataTime; + uint64_t totalRows; + uint64_t totalCheckedRows; + uint32_t totalBlocks; + uint32_t loadBlocks; + uint32_t loadBlockStatis; + uint32_t discardBlocks; + uint64_t elapsedTime; + uint64_t firstStageMergeTime; + uint64_t winInfoSize; + uint64_t tableInfoSize; + uint64_t hashSize; + uint64_t numOfTimeWindows; - SArray *queryProfEvents; //SArray - SHashObj *operatorProfResults; //map + SArray* queryProfEvents; // SArray + SHashObj* operatorProfResults; // map } STaskCostInfo; typedef struct { @@ -166,67 +169,67 @@ typedef struct { // The basic query information extracted from the SQueryInfo tree to support the // execution of query in a data node. typedef struct STaskAttr { - SLimit limit; - SLimit slimit; + SLimit limit; + SLimit slimit; // todo comment it - bool stableQuery; // super table query or not - bool topBotQuery; // TODO used bitwise flag - bool groupbyColumn; // denote if this is a groupby normal column query - bool hasTagResults; // if there are tag values in final result or not - bool timeWindowInterpo;// if the time window start/end required interpolation - bool queryBlockDist; // if query data block distribution - bool stabledev; // super table stddev query - bool tsCompQuery; // is tscomp query - bool diffQuery; // is diff query - bool simpleAgg; - bool pointInterpQuery; // point interpolation query - bool needReverseScan; // need reverse scan - bool distinct; // distinct query or not - bool stateWindow; // window State on sub/normal table - bool createFilterOperator; // if filter operator is needed - bool multigroupResult; // multigroup result can exist in one SSDataBlock - int32_t interBufSize; // intermediate buffer sizse + bool stableQuery; // super table query or not + bool topBotQuery; // TODO used bitwise flag + bool groupbyColumn; // denote if this is a groupby normal column query + bool hasTagResults; // if there are tag values in final result or not + bool timeWindowInterpo; // if the time window start/end required interpolation + bool queryBlockDist; // if query data block distribution + bool stabledev; // super table stddev query + bool tsCompQuery; // is tscomp query + bool diffQuery; // is diff query + bool simpleAgg; + bool pointInterpQuery; // point interpolation query + bool needReverseScan; // need reverse scan + bool distinct; // distinct query or not + bool stateWindow; // window State on sub/normal table + bool createFilterOperator; // if filter operator is needed + bool multigroupResult; // multigroup result can exist in one SSDataBlock + int32_t interBufSize; // intermediate buffer sizse - int32_t havingNum; // having expr number + int32_t havingNum; // having expr number - SOrder order; - int16_t numOfCols; - int16_t numOfTags; + SOrder order; + int16_t numOfCols; + int16_t numOfTags; - STimeWindow window; - SInterval interval; - SSessionWindow sw; - int16_t precision; - int16_t numOfOutput; - int16_t fillType; + STimeWindow window; + SInterval interval; + SSessionWindow sw; + int16_t precision; + int16_t numOfOutput; + int16_t fillType; - int32_t srcRowSize; // todo extract struct - int32_t resultRowSize; - int32_t intermediateResultRowSize; // intermediate result row size, in case of top-k query. - int32_t maxTableColumnWidth; - int32_t tagLen; // tag value length of current query - SGroupbyExpr *pGroupbyExpr; + int32_t srcRowSize; // todo extract struct + int32_t resultRowSize; + int32_t intermediateResultRowSize; // intermediate result row size, in case of top-k query. + int32_t maxTableColumnWidth; + int32_t tagLen; // tag value length of current query + SGroupbyExpr* pGroupbyExpr; - SExprInfo* pExpr1; - SExprInfo* pExpr2; - int32_t numOfExpr2; - SExprInfo* pExpr3; - int32_t numOfExpr3; + SExprInfo* pExpr1; + SExprInfo* pExpr2; + int32_t numOfExpr2; + SExprInfo* pExpr3; + int32_t numOfExpr3; - SColumnInfo* tableCols; - SColumnInfo* tagColList; - int32_t numOfFilterCols; - int64_t* fillVal; - SOrderedPrjQueryInfo prjInfo; // limit value for each vgroup, only available in global order projection query. + SColumnInfo* tableCols; + SColumnInfo* tagColList; + int32_t numOfFilterCols; + int64_t* fillVal; + SOrderedPrjQueryInfo prjInfo; // limit value for each vgroup, only available in global order projection query. SSingleColumnFilterInfo* pFilterInfo; -// SFilterInfo *pFilters; - - void* tsdb; - STableGroupInfo tableGroupInfo; // table list SArray - int32_t vgId; - SArray *pUdfInfo; // no need to free + // SFilterInfo *pFilters; + + void* tsdb; + STableGroupInfo tableGroupInfo; // table list SArray + int32_t vgId; + SArray* pUdfInfo; // no need to free } STaskAttr; typedef int32_t (*__optr_prepare_fn_t)(void* param); @@ -236,176 +239,185 @@ typedef void (*__optr_cleanup_fn_t)(void* param, int32_t num); struct SOperatorInfo; typedef struct STaskIdInfo { - uint64_t queryId; // this is also a request id - uint64_t subplanId; - uint64_t templateId; - char *str; + uint64_t queryId; // this is also a request id + uint64_t subplanId; + uint64_t templateId; + char* str; } STaskIdInfo; typedef struct SExecTaskInfo { STaskIdInfo id; - char *content; + char* content; uint32_t status; STimeWindow window; STaskCostInfo cost; - int64_t owner; // if it is in execution + int64_t owner; // if it is in execution int32_t code; - uint64_t totalRows; // total number of rows + uint64_t totalRows; // total number of rows STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure - char *sql; // query sql string - jmp_buf env; // - struct SOperatorInfo *pRoot; + char* sql; // query sql string + jmp_buf env; // + struct SOperatorInfo* pRoot; } SExecTaskInfo; typedef struct STaskRuntimeEnv { - jmp_buf env; - STaskAttr* pQueryAttr; - uint32_t status; // query status - void* qinfo; - uint8_t scanFlag; // denotes reversed scan of data or not - void* pTsdbReadHandle; + jmp_buf env; + STaskAttr* pQueryAttr; + uint32_t status; // query status + void* qinfo; + uint8_t scanFlag; // denotes reversed scan of data or not + void* pTsdbReadHandle; - int32_t prevGroupId; // previous executed group id - bool enableGroupData; - SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file - SHashObj* pResultRowHashTable; // quick locate the window object for each result - SHashObj* pResultRowListSet; // used to check if current ResultRowInfo has ResultRow object or not - SArray* pResultRowArrayList; // The array list that contains the Result rows - char* keyBuf; // window key buffer - SResultRowPool* pool; // The window result objects pool, all the resultRow Objects are allocated and managed by this object. - char** prevRow; + int32_t prevGroupId; // previous executed group id + bool enableGroupData; + SDiskbasedBuf* pResultBuf; // query result buffer based on blocked-wised disk file + SHashObj* pResultRowHashTable; // quick locate the window object for each result + SHashObj* pResultRowListSet; // used to check if current ResultRowInfo has ResultRow object or not + SArray* pResultRowArrayList; // The array list that contains the Result rows + char* keyBuf; // window key buffer + // The window result objects pool, all the resultRow Objects are allocated and managed by this object. + char** prevRow; + SResultRowPool* pool; - SArray* prevResult; // intermediate result, SArray - STSBuf* pTsBuf; // timestamp filter list - STSCursor cur; + SArray* prevResult; // intermediate result, SArray + STSBuf* pTsBuf; // timestamp filter list + STSCursor cur; - char* tagVal; // tag value of current data block - struct SScalarFunctionSupport * scalarSup; + char* tagVal; // tag value of current data block + struct SScalarFunctionSupport* scalarSup; - SSDataBlock *outputBuf; - STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure - struct SOperatorInfo *proot; + SSDataBlock* outputBuf; + STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure + struct SOperatorInfo* proot; SGroupResInfo groupResInfo; - int64_t currentOffset; // dynamic offset value + int64_t currentOffset; // dynamic offset value - STableQueryInfo *current; - SRspResultInfo resultInfo; - SHashObj *pTableRetrieveTsMap; - struct SUdfInfo *pUdfInfo; + STableQueryInfo* current; + SRspResultInfo resultInfo; + SHashObj* pTableRetrieveTsMap; + struct SUdfInfo* pUdfInfo; } STaskRuntimeEnv; enum { - OP_IN_EXECUTING = 1, - OP_RES_TO_RETURN = 2, - OP_EXEC_DONE = 3, + OP_IN_EXECUTING = 1, + OP_RES_TO_RETURN = 2, + OP_EXEC_DONE = 3, }; typedef struct SOperatorInfo { - uint8_t operatorType; - bool blockingOptr; // block operator or not - uint8_t status; // denote if current operator is completed - int32_t numOfOutput; // number of columns of the current operator results - char *name; // name, used to show the query execution plan - void *info; // extension attribution - SExprInfo *pExpr; - STaskRuntimeEnv *pRuntimeEnv; // todo remove it - SExecTaskInfo *pTaskInfo; + uint8_t operatorType; + bool blockingOptr; // block operator or not + uint8_t status; // denote if current operator is completed + int32_t numOfOutput; // number of columns of the current operator results + char* name; // name, used to show the query execution plan + void* info; // extension attribution + SExprInfo* pExpr; + STaskRuntimeEnv* pRuntimeEnv; // todo remove it + SExecTaskInfo* pTaskInfo; - struct SOperatorInfo **pDownstream; // downstram pointer list - int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator - __optr_prepare_fn_t prepareFn; - __operator_fn_t exec; - __optr_cleanup_fn_t cleanupFn; + struct SOperatorInfo** pDownstream; // downstram pointer list + int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator + __optr_prepare_fn_t prepareFn; + __operator_fn_t exec; + __optr_cleanup_fn_t cleanupFn; } SOperatorInfo; -enum { - QUERY_RESULT_NOT_READY = 1, - QUERY_RESULT_READY = 2, -}; - typedef struct { int32_t numOfTags; int32_t numOfCols; - SColumnInfo *colList; + SColumnInfo* colList; } SQueriedTableInfo; typedef struct SQInfo { - void* signature; - uint64_t qId; - int32_t code; // error code to returned to client - int64_t owner; // if it is in execution + void* signature; + uint64_t qId; + int32_t code; // error code to returned to client + int64_t owner; // if it is in execution STaskRuntimeEnv runtimeEnv; STaskAttr query; - void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; + void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; - pthread_mutex_t lock; // used to synchronize the rsp/query threads - tsem_t ready; - int32_t dataReady; // denote if query result is ready or not - void* rspContext; // response context - int64_t startExecTs; // start to exec timestamp - char* sql; // query sql string - STaskCostInfo summary; + pthread_mutex_t lock; // used to synchronize the rsp/query threads + tsem_t ready; + int32_t dataReady; // denote if query result is ready or not + void* rspContext; // response context + int64_t startExecTs; // start to exec timestamp + char* sql; // query sql string + STaskCostInfo summary; } SQInfo; typedef struct STaskParam { - char *sql; - char *tagCond; - char *colCond; - char *tbnameCond; - char *prevResult; - SArray *pTableIdList; - SSqlExpr **pExpr; - SSqlExpr **pSecExpr; - SExprInfo *pExprs; - SExprInfo *pSecExprs; + char* sql; + char* tagCond; + char* colCond; + char* tbnameCond; + char* prevResult; + SArray* pTableIdList; + SSqlExpr** pExpr; + SSqlExpr** pSecExpr; + SExprInfo* pExprs; + SExprInfo* pSecExprs; - SFilterInfo *pFilters; + SFilterInfo* pFilters; - SColIndex *pGroupColIndex; - SColumnInfo *pTagColumnInfo; - SGroupbyExpr *pGroupbyExpr; + SColIndex* pGroupColIndex; + SColumnInfo* pTagColumnInfo; + SGroupbyExpr* pGroupbyExpr; int32_t tableScanOperator; - SArray *pOperator; - struct SUdfInfo *pUdfInfo; + SArray* pOperator; + struct SUdfInfo* pUdfInfo; } STaskParam; -typedef struct SExchangeInfo { - SArray *pSources; - tsem_t ready; - void *pTransporter; - SRetrieveTableRsp *pRsp; - SSDataBlock *pResult; - int32_t current; - uint64_t rowsOfCurrentSource; +enum { + DATA_NOT_READY = 0x1, + DATA_READY = 0x2, + DATA_EXHAUSTED = 0x3, +}; - uint64_t totalSize; // total load bytes from remote - uint64_t totalRows; // total number of rows - uint64_t totalElapsed;// total elapsed time +typedef struct SSourceDataInfo { + struct SExchangeInfo *pEx; + int32_t index; + SRetrieveTableRsp *pRsp; + uint64_t totalRows; + int32_t status; +} SSourceDataInfo; + +typedef struct SExchangeInfo { + SArray* pSources; + SArray* pSourceDataInfo; + tsem_t ready; + void* pTransporter; + SSDataBlock* pResult; + bool seqLoadData; // sequential load data or not, false by default + int32_t current; + uint64_t totalSize; // total load bytes from remote + uint64_t totalRows; // total number of rows + uint64_t totalElapsed; // total elapsed time } SExchangeInfo; typedef struct STableScanInfo { - void *pTsdbReadHandle; - int32_t numOfBlocks; // extract basic running information. - int32_t numOfSkipped; - int32_t numOfBlockStatis; - int64_t numOfRows; - - int32_t order; // scan order - int32_t times; // repeat counts - int32_t current; - int32_t reverseTimes; // 0 by default + void* pTsdbReadHandle; + int32_t numOfBlocks; // extract basic running information. + int32_t numOfSkipped; + int32_t numOfBlockStatis; + int64_t numOfRows; - SqlFunctionCtx *pCtx; // next operator query context - SResultRowInfo *pResultRowInfo; - int32_t *rowCellInfoOffset; - SExprInfo *pExpr; + int32_t order; // scan order + int32_t times; // repeat counts + int32_t current; + int32_t reverseTimes; // 0 by default + + SqlFunctionCtx* pCtx; // next operator query context + SResultRowInfo* pResultRowInfo; + int32_t* rowCellInfoOffset; + SExprInfo* pExpr; SSDataBlock block; int32_t numOfOutput; int64_t elapsedTime; int32_t prevGroupId; // previous table group id - int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan + int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan } STableScanInfo; typedef struct STagScanInfo { @@ -416,32 +428,36 @@ typedef struct STagScanInfo { } STagScanInfo; typedef struct SStreamBlockScanInfo { - SSDataBlock *pRes; // result SSDataBlock - SColumnInfo *pCols; // the output column info - uint64_t numOfRows; // total scanned rows - uint64_t numOfExec; // execution times - void *readerHandle;// stream block reader handle + SSDataBlock* pRes; // result SSDataBlock + SColumnInfo* pCols; // the output column info + uint64_t numOfRows; // total scanned rows + uint64_t numOfExec; // execution times + void* readerHandle; // stream block reader handle } SStreamBlockScanInfo; typedef struct SOptrBasicInfo { - SResultRowInfo resultRowInfo; - int32_t *rowCellInfoOffset; // offset value for each row result cell info - SqlFunctionCtx *pCtx; - SSDataBlock *pRes; + SResultRowInfo resultRowInfo; + int32_t* rowCellInfoOffset; // offset value for each row result cell info + SqlFunctionCtx* pCtx; + SSDataBlock* pRes; + uint32_t resRowSize; + int32_t capacity; } SOptrBasicInfo; typedef struct SOptrBasicInfo STableIntervalOperatorInfo; typedef struct SAggOperatorInfo { - SOptrBasicInfo binfo; - uint32_t seed; - SDiskbasedResultBuf *pResultBuf; // query result buffer based on blocked-wised disk file - SHashObj* pResultRowHashTable; // quick locate the window object for each result - SHashObj* pResultRowListSet; // used to check if current ResultRowInfo has ResultRow object or not - SArray* pResultRowArrayList; // The array list that contains the Result rows - char* keyBuf; // window key buffer - SResultRowPool* pool; // The window result objects pool, all the resultRow Objects are allocated and managed by this object. - STableQueryInfo *current; + SOptrBasicInfo binfo; + SDiskbasedBuf *pResultBuf; // query result buffer based on blocked-wised disk file + SHashObj* pResultRowHashTable; // quick locate the window object for each result + SHashObj* pResultRowListSet; // used to check if current ResultRowInfo has ResultRow object or not + SArray* pResultRowArrayList; // The array list that contains the Result rows + char* keyBuf; // window key buffer + SResultRowPool *pool; // The window result objects pool, all the resultRow Objects are allocated and managed by this object. + STableQueryInfo *current; + uint32_t groupId; + SGroupResInfo groupResInfo; + STableQueryInfo *pTableQueryInfo; } SAggOperatorInfo; typedef struct SProjectOperatorInfo { @@ -449,52 +465,52 @@ typedef struct SProjectOperatorInfo { int32_t bufCapacity; uint32_t seed; - SSDataBlock *existDataBlock; + SSDataBlock* existDataBlock; } SProjectOperatorInfo; typedef struct SLimitOperatorInfo { - int64_t limit; - int64_t total; + int64_t limit; + int64_t total; } SLimitOperatorInfo; typedef struct SSLimitOperatorInfo { - int64_t groupTotal; - int64_t currentGroupOffset; + int64_t groupTotal; + int64_t currentGroupOffset; - int64_t rowsTotal; - int64_t currentOffset; - SLimit limit; - SLimit slimit; + int64_t rowsTotal; + int64_t currentOffset; + SLimit limit; + SLimit slimit; - char **prevRow; - SArray *orderColumnList; + char** prevRow; + SArray* orderColumnList; bool hasPrev; bool ignoreCurrentGroup; bool multigroupResult; - SSDataBlock *pRes; // result buffer - SSDataBlock *pPrevBlock; + SSDataBlock* pRes; // result buffer + SSDataBlock* pPrevBlock; int64_t capacity; int64_t threshold; } SSLimitOperatorInfo; typedef struct SFilterOperatorInfo { - SSingleColumnFilterInfo *pFilterInfo; - int32_t numOfFilterCols; + SSingleColumnFilterInfo* pFilterInfo; + int32_t numOfFilterCols; } SFilterOperatorInfo; typedef struct SFillOperatorInfo { - struct SFillInfo *pFillInfo; - SSDataBlock *pRes; - int64_t totalInputRows; - void **p; - SSDataBlock *existNewGroupBlock; - bool multigroupResult; + struct SFillInfo* pFillInfo; + SSDataBlock* pRes; + int64_t totalInputRows; + void** p; + SSDataBlock* existNewGroupBlock; + bool multigroupResult; } SFillOperatorInfo; typedef struct SGroupbyOperatorInfo { SOptrBasicInfo binfo; int32_t colIndex; - char *prevData; // previous group by value + char* prevData; // previous group by value } SGroupbyOperatorInfo; typedef struct SSWindowOperatorInfo { @@ -503,16 +519,16 @@ typedef struct SSWindowOperatorInfo { TSKEY prevTs; // previous timestamp int32_t numOfRows; // number of rows int32_t start; // start row index - bool reptScan; // next round scan + bool reptScan; // next round scan } SSWindowOperatorInfo; typedef struct SStateWindowOperatorInfo { SOptrBasicInfo binfo; STimeWindow curWindow; // current time window int32_t numOfRows; // number of rows - int32_t colIndex; // start row index + int32_t colIndex; // start row index int32_t start; - char* prevData; // previous data + char* prevData; // previous data bool reptScan; } SStateWindowOperatorInfo; @@ -520,147 +536,186 @@ typedef struct SDistinctDataInfo { int32_t index; int32_t type; int32_t bytes; -} SDistinctDataInfo; +} SDistinctDataInfo; typedef struct SDistinctOperatorInfo { - SHashObj *pSet; - SSDataBlock *pRes; - bool recordNullVal; //has already record the null value, no need to try again - int64_t threshold; - int64_t outputCapacity; - int32_t totalBytes; - char* buf; - SArray* pDistinctDataInfo; + SHashObj* pSet; + SSDataBlock* pRes; + bool recordNullVal; // has already record the null value, no need to try again + int64_t threshold; + int64_t outputCapacity; + int32_t totalBytes; + char* buf; + SArray* pDistinctDataInfo; } SDistinctOperatorInfo; struct SGlobalMerger; typedef struct SMultiwayMergeInfo { - struct SGlobalMerger *pMerge; - SOptrBasicInfo binfo; - int32_t bufCapacity; - int64_t seed; - char **prevRow; - SArray *orderColumnList; - int32_t resultRowFactor; + struct SGlobalMerger* pMerge; + SOptrBasicInfo binfo; + int32_t bufCapacity; + int64_t seed; + char** prevRow; + SArray* orderColumnList; + int32_t resultRowFactor; - bool hasGroupColData; - char **currentGroupColData; - SArray *groupColumnList; - bool hasDataBlockForNewGroup; - SSDataBlock *pExistBlock; + bool hasGroupColData; + char** currentGroupColData; + SArray* groupColumnList; + bool hasDataBlockForNewGroup; + SSDataBlock* pExistBlock; - SArray *udfInfo; - bool hasPrev; - bool multiGroupResults; + SArray* udfInfo; + bool hasPrev; + bool multiGroupResults; } SMultiwayMergeInfo; -// todo support the disk-based sort +typedef struct SMsortComparParam { + struct SExternalMemSource **pSources; + int32_t numOfSources; + SArray *orderInfo; // SArray + bool nullFirst; +} SMsortComparParam; + typedef struct SOrderOperatorInfo { - int32_t colIndex; - int32_t order; - SSDataBlock *pDataBlock; + int32_t sourceId; + uint32_t sortBufSize; // max buffer size for in-memory sort + SSDataBlock *pDataBlock; + bool hasVarCol; // has variable length column, such as binary/varchar/nchar + int32_t numOfCompleted; + SDiskbasedBuf *pSortInternalBuf; + SMultiwayMergeTreeInfo *pMergeTree; + SArray *pSources; // SArray + int32_t bufPageSize; + int32_t numOfRowsInRes; + + SMsortComparParam cmpParam; + + int64_t startTs; // sort start time + uint64_t sortElapsed; // sort elapsed time, time to flush to disk not included. + uint64_t totalSize; // total load bytes from remote + uint64_t totalRows; // total number of rows + uint64_t totalElapsed; // total elapsed time } SOrderOperatorInfo; SOperatorInfo* createExchangeOperatorInfo(const SArray* pSources, const SArray* pSchema, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbReadHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime, int32_t reverseTime, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createTableSeqScanOperator(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv); -SOperatorInfo* createSubmitBlockScanOperatorInfo(void *pSubmitBlockReadHandle, int32_t numOfOutput, SExecTaskInfo* pTaskInfo); - -SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createProjectOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createTableScanOperatorInfo(void* pTsdbReadHandle, int32_t order, int32_t numOfOutput, + int32_t repeatTime, int32_t reverseTime, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv); +SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo); +SOperatorInfo* createMultiTableAggOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo); +SOperatorInfo* createProjectOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); SOperatorInfo* createLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream); -SOperatorInfo* createTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createAllTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createSWindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createFillOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult); -SOperatorInfo* createGroupbyOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createMultiTableAggOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); +SOperatorInfo* createAllTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, + SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createSWindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); +SOperatorInfo* createFillOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput, bool multigroupResult); +SOperatorInfo* createGroupbyOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); + +SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, + SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, + SExprInfo* pExpr, int32_t numOfOutput); SOperatorInfo* createTagScanOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createDistinctOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createDistinctOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv); SOperatorInfo* createMultiwaySortOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput, int32_t numOfRows, void* merger); -SOperatorInfo* createGlobalAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp); -SOperatorInfo* createStatewindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createSLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, void* merger, bool multigroupResult); +SOperatorInfo* createGlobalAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, + SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, + bool groupResultMixedUp); +SOperatorInfo* createStatewindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput); +SOperatorInfo* createSLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, + int32_t numOfOutput, void* merger, bool multigroupResult); SOperatorInfo* createFilterOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, SColumnInfo* pCols, int32_t numOfFilter); -SOperatorInfo* createJoinOperatorInfo(SOperatorInfo** pdownstream, int32_t numOfDownstream, SSchema* pSchema, int32_t numOfOutput); -SOperatorInfo* createOrderOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal); +SOperatorInfo* createJoinOperatorInfo(SOperatorInfo** pdownstream, int32_t numOfDownstream, SSchema* pSchema, + int32_t numOfOutput); +SOperatorInfo* createOrderOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SArray* pOrderVal); +SOperatorInfo* createMergeSortOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, + SOrder* pOrderVal); -//SSDataBlock* doGlobalAggregate(void* param, bool* newgroup); -//SSDataBlock* doMultiwayMergeSort(void* param, bool* newgroup); -//SSDataBlock* doSLimit(void* param, bool* newgroup); +// SSDataBlock* doGlobalAggregate(void* param, bool* newgroup); +// SSDataBlock* doMultiwayMergeSort(void* param, bool* newgroup); +// SSDataBlock* doSLimit(void* param, bool* newgroup); -//int32_t doCreateFilterInfo(SColumnInfo* pCols, int32_t numOfCols, int32_t numOfFilterCols, SSingleColumnFilterInfo** pFilterInfo, uint64_t qId); +// int32_t doCreateFilterInfo(SColumnInfo* pCols, int32_t numOfCols, int32_t numOfFilterCols, SSingleColumnFilterInfo** pFilterInfo, uint64_t qId); void doSetFilterColumnInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, SSDataBlock* pBlock); bool doFilterDataBlock(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, int32_t numOfRows, int8_t* p); void doCompactSDataBlock(SSDataBlock* pBlock, int32_t numOfRows, int8_t* p); SSDataBlock* createOutputBuf(SExprInfo* pExpr, int32_t numOfOutput, int32_t numOfRows); -void* destroyOutputBuf(SSDataBlock* pBlock); void* doDestroyFilterInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols); void setInputDataBlock(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t order); -void finalizeQueryResult(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SResultRowInfo* pResultRowInfo, int32_t* rowCellInfoOffset); -void updateOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity, int32_t numOfInputRows); -void clearOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity); +void finalizeQueryResult(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SResultRowInfo* pResultRowInfo, + int32_t* rowCellInfoOffset); +void updateOutputBuf(SOptrBasicInfo* pBInfo, int32_t* bufCapacity, int32_t numOfInputRows); +void clearOutputBuf(SOptrBasicInfo* pBInfo, int32_t* bufCapacity); void copyTsColoum(SSDataBlock* pRes, SqlFunctionCtx* pCtx, int32_t numOfOutput); -void freeParam(STaskParam *param); -int32_t createQueryFunc(SQueriedTableInfo* pTableInfo, int32_t numOfOutput, SExprInfo** pExprInfo, - SSqlExpr** pExprMsg, SColumnInfo* pTagCols, int32_t queryType, void* pMsg, struct SUdfInfo* pUdfInfo); +int32_t createQueryFunc(SQueriedTableInfo* pTableInfo, int32_t numOfOutput, SExprInfo** pExprInfo, SSqlExpr** pExprMsg, + SColumnInfo* pTagCols, int32_t queryType, void* pMsg, struct SUdfInfo* pUdfInfo); -int32_t createIndirectQueryFuncExprFromMsg(SQueryTableReq *pQueryMsg, int32_t numOfOutput, SExprInfo **pExprInfo, - SSqlExpr **pExpr, SExprInfo *prevExpr, struct SUdfInfo *pUdfInfo); +int32_t createIndirectQueryFuncExprFromMsg(SQueryTableReq* pQueryMsg, int32_t numOfOutput, SExprInfo** pExprInfo, + SSqlExpr** pExpr, SExprInfo* prevExpr, struct SUdfInfo* pUdfInfo); -int32_t createQueryFilter(char *data, uint16_t len, SFilterInfo** pFilters); +int32_t createQueryFilter(char* data, uint16_t len, SFilterInfo** pFilters); -SGroupbyExpr *createGroupbyExprFromMsg(SQueryTableReq *pQueryMsg, SColIndex *pColIndex, int32_t *code); +SGroupbyExpr* createGroupbyExprFromMsg(SQueryTableReq* pQueryMsg, SColIndex* pColIndex, int32_t* code); int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, STaskParam* param, char* start, int32_t prevResultLen, void* merger); int32_t createFilterInfo(STaskAttr* pQueryAttr, uint64_t qId); -void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters); +void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters); -STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf); +STableQueryInfo* createTableQueryInfo(void* buf, bool groupbyColumn, STimeWindow win); STableQueryInfo* createTmpTableQueryInfo(STimeWindow win); -int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, void *pQueryMsg); +int32_t buildArithmeticExprFromMsg(SExprInfo* pArithExprInfo, void* pQueryMsg); -bool isTaskKilled(SExecTaskInfo *pTaskInfo); +bool isTaskKilled(SExecTaskInfo* pTaskInfo); int32_t checkForQueryBuf(size_t numOfTables); -bool checkNeedToCompressQueryCol(SQInfo *pQInfo); -void setQueryStatus(STaskRuntimeEnv *pRuntimeEnv, int8_t status); +bool checkNeedToCompressQueryCol(SQInfo* pQInfo); +void setQueryStatus(STaskRuntimeEnv* pRuntimeEnv, int8_t status); bool onlyQueryTags(STaskAttr* pQueryAttr); -//void destroyUdfInfo(struct SUdfInfo* pUdfInfo); +// void destroyUdfInfo(struct SUdfInfo* pUdfInfo); -int32_t doDumpQueryResult(SQInfo *pQInfo, char *data, int8_t compressed, int32_t *compLen); +int32_t doDumpQueryResult(SQInfo* pQInfo, char* data, int8_t compressed, int32_t* compLen); -size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows); -void setTaskKilled(SExecTaskInfo *pTaskInfo); +size_t getResultSize(SQInfo* pQInfo, int64_t* numOfRows); +void setTaskKilled(SExecTaskInfo* pTaskInfo); void publishOperatorProfEvent(SOperatorInfo* operatorInfo, EQueryProfEventType eventType); -void publishQueryAbortEvent(SExecTaskInfo * pTaskInfo, int32_t code); +void publishQueryAbortEvent(SExecTaskInfo* pTaskInfo, int32_t code); void calculateOperatorProfResults(SQInfo* pQInfo); -void queryCostStatis(SExecTaskInfo *pTaskInfo); +void queryCostStatis(SExecTaskInfo* pTaskInfo); -void doDestroyTask(SExecTaskInfo *pTaskInfo); -void freeQueryAttr(STaskAttr *pQuery); +void doDestroyTask(SExecTaskInfo* pTaskInfo); +void freeQueryAttr(STaskAttr* pQuery); int32_t getMaximumIdleDurationSec(); -void doInvokeUdf(struct SUdfInfo* pUdfInfo, SqlFunctionCtx *pCtx, int32_t idx, int32_t type); -void setTaskStatus(SExecTaskInfo *pTaskInfo, int8_t status); +void doInvokeUdf(struct SUdfInfo* pUdfInfo, SqlFunctionCtx* pCtx, int32_t idx, int32_t type); +void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId); +#ifdef __cplusplus +} +#endif + #endif // TDENGINE_EXECUTORIMPL_H diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 78093ce080..52ab8493f1 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -141,9 +141,9 @@ void clearResultRow(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResultRow, int16_ return; } - // the result does not put into the SDiskbasedResultBuf, ignore it. + // the result does not put into the SDiskbasedBuf, ignore it. if (pResultRow->pageId >= 0) { - SFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResultRow->pageId); + SFilePage *page = getBufPage(pRuntimeEnv->pResultBuf, pResultRow->pageId); int16_t offset = 0; for (int32_t i = 0; i < pRuntimeEnv->pQueryAttr->numOfOutput; ++i) { @@ -358,7 +358,6 @@ void initGroupResInfo(SGroupResInfo* pGroupResInfo, SResultRowInfo* pResultInfo) pGroupResInfo->pRows = taosArrayFromList(pResultInfo->pResult, pResultInfo->size, POINTER_BYTES); pGroupResInfo->index = 0; - assert(pGroupResInfo->index <= getNumOfTotalRes(pGroupResInfo)); } @@ -533,7 +532,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv int32_t code = TSDB_CODE_SUCCESS; int32_t *posList = NULL; - SLoserTreeInfo *pTree = NULL; + SMultiwayMergeTreeInfo *pTree = NULL; STableQueryInfo **pTableQueryInfoList = NULL; size_t size = taosArrayGetSize(pTableList); @@ -566,7 +565,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv SCompSupporter cs = {pTableQueryInfoList, posList, pRuntimeEnv->pQueryAttr->order.order}; - int32_t ret = tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn); + int32_t ret = tMergeTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn); if (ret != TSDB_CODE_SUCCESS) { code = TSDB_CODE_QRY_OUT_OF_MEMORY; goto _end; @@ -576,7 +575,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv int64_t startt = taosGetTimestampMs(); while (1) { - int32_t tableIndex = pTree->pNode[0].index; + int32_t tableIndex = tMergeTreeGetChosenIndex(pTree); SResultRowInfo *pWindowResInfo = &pTableQueryInfoList[tableIndex]->resInfo; SResultRow *pWindowRes = getResultRow(pWindowResInfo, cs.rowIndex[tableIndex]); @@ -612,7 +611,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv } } - tLoserTreeAdjust(pTree, tableIndex + pTree->numOfEntries); + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); } int64_t endt = taosGetTimestampMs(); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 2d3085c82b..f5dc7a82b1 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -12,12 +12,13 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#include "parser.h" -#include "tq.h" +#include #include "exception.h" #include "os.h" +#include "parser.h" #include "tglobal.h" #include "tmsg.h" +#include "tq.h" #include "ttime.h" #include "executorimpl.h" @@ -214,7 +215,7 @@ static void doSetOperatorCompleted(SOperatorInfo* pOperator) { } } -static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock); +static int32_t doCopyToSDataBlock(SDiskbasedBuf *pBuf, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock, int32_t rowCapacity); static int32_t getGroupbyColumnIndex(SGroupbyExpr *pGroupbyExpr, SSDataBlock* pDataBlock); static int32_t setGroupResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binf, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex); @@ -225,12 +226,10 @@ static void setResultBufSize(STaskAttr* pQueryAttr, SRspResultInfo* pResultInfo) static void setCtxTagForJoin(STaskRuntimeEnv* pRuntimeEnv, SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, void* pTable); static void setParamForStableStddev(STaskRuntimeEnv* pRuntimeEnv, SqlFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr); static void setParamForStableStddevByColData(STaskRuntimeEnv* pRuntimeEnv, SqlFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr, char* val, int16_t bytes); -static void doSetTableGroupOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, - SqlFunctionCtx* pCtx, int32_t* rowCellInfoOffset, int32_t numOfOutput, int32_t tableGroupId); +static void doSetTableGroupOutputBuf(SAggOperatorInfo* pAggInfo, int32_t numOfOutput, int32_t tableGroupId, SExecTaskInfo* pTaskInfo); SArray* getOrderCheckColumns(STaskAttr* pQuery); - typedef struct SRowCompSupporter { STaskRuntimeEnv *pRuntimeEnv; int16_t dataOffset; @@ -244,8 +243,8 @@ static int compareRowData(const void *a, const void *b, const void *userData) { SRowCompSupporter *supporter = (SRowCompSupporter *)userData; STaskRuntimeEnv* pRuntimeEnv = supporter->pRuntimeEnv; - SFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pRow1->pageId); - SFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pRow2->pageId); + SFilePage *page1 = getBufPage(pRuntimeEnv->pResultBuf, pRow1->pageId); + SFilePage *page2 = getBufPage(pRuntimeEnv->pResultBuf, pRow2->pageId); int16_t offset = supporter->dataOffset; char *in1 = getPosInResultPage(pRuntimeEnv->pQueryAttr, page1, pRow1->offset, offset); @@ -337,23 +336,6 @@ SSDataBlock* createOutputBuf_rv(SArray* pExprInfo, int32_t numOfRows) { return res; } -void* destroyOutputBuf(SSDataBlock* pBlock) { - if (pBlock == NULL) { - return NULL; - } - - int32_t numOfOutput = pBlock->info.numOfCols; - for(int32_t i = 0; i < numOfOutput; ++i) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i); - tfree(pColInfoData->pData); - } - - taosArrayDestroy(pBlock->pDataBlock); - tfree(pBlock->pBlockAgg); - tfree(pBlock); - return NULL; -} - static bool isSelectivityWithTagsQuery(SqlFunctionCtx *pCtx, int32_t numOfOutput) { return true; // bool hasTags = false; @@ -709,7 +691,7 @@ static STimeWindow getCurrentActiveTimeWindow(SResultRowInfo * pResultRowInfo, i } // a new buffer page for each table. Needs to opt this design -static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t tid, uint32_t size) { +static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedBuf *pResultBuf, int32_t tid, uint32_t size) { if (pWindowRes->pageId != -1) { return 0; } @@ -724,12 +706,12 @@ static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf pData = getNewDataBuf(pResultBuf, tid, &pageId); } else { SPageInfo* pi = getLastPageInfo(list); - pData = getResBufPage(pResultBuf, pi->pageId); - pageId = pi->pageId; + pData = getBufPage(pResultBuf, getPageId(pi)); + pageId = getPageId(pi); - if (pData->num + size > pResultBuf->pageSize) { + if (pData->num + size > getBufPageSize(pResultBuf)) { // release current page first, and prepare the next one - releaseResBufPageInfo(pResultBuf, pi); + releaseBufPageInfo(pResultBuf, pi); pData = getNewDataBuf(pResultBuf, tid, &pageId); if (pData != NULL) { assert(pData->num == 0); // number of elements must be 0 for new allocated buffer @@ -764,7 +746,7 @@ static int32_t setResultOutputBufByKey(STaskRuntimeEnv *pRuntimeEnv, SResultRowI bool masterscan, SResultRow **pResult, int64_t tableGroupId, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { assert(win->skey <= win->ekey); - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedBuf *pResultBuf = pRuntimeEnv->pResultBuf; SResultRow *pResultRow = doSetResultOutBufByKey(pRuntimeEnv, pResultRowInfo, tid, (char *)&win->skey, TSDB_KEYSIZE, masterscan, tableGroupId); if (pResultRow == NULL) { @@ -1771,7 +1753,7 @@ static void setResultRowKey(SResultRow* pResultRow, char* pData, int16_t type) { } static int32_t setGroupResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binfo, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) { - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedBuf *pResultBuf = pRuntimeEnv->pResultBuf; int32_t *rowCellInfoOffset = binfo->rowCellInfoOffset; SResultRowInfo *pResultRowInfo = &binfo->resultRowInfo; @@ -2024,7 +2006,7 @@ static SqlFunctionCtx* createSqlFunctionCtx(STaskRuntimeEnv* pRuntimeEnv, SExprI return pFuncCtx; } -static SqlFunctionCtx* createSqlFunctionCtx_rv(SArray* pExprInfo, int32_t** rowCellInfoOffset) { +static SqlFunctionCtx* createSqlFunctionCtx_rv(SArray* pExprInfo, int32_t** rowCellInfoOffset, uint32_t* pRowSize) { size_t numOfOutput = taosArrayGetSize(pExprInfo); SqlFunctionCtx * pFuncCtx = (SqlFunctionCtx *)calloc(numOfOutput, sizeof(SqlFunctionCtx)); @@ -2122,6 +2104,7 @@ static SqlFunctionCtx* createSqlFunctionCtx_rv(SArray* pExprInfo, int32_t** rowC for(int32_t i = 1; i < numOfOutput; ++i) { SExprInfo* pExpr = taosArrayGetP(pExprInfo, i - 1); (*rowCellInfoOffset)[i] = (int32_t)((*rowCellInfoOffset)[i - 1] + sizeof(SResultRowEntryInfo) + pExpr->base.interBytes); + *pRowSize += pExpr->base.resSchema.bytes; } setCtxTagColumnInfo(pFuncCtx, numOfOutput); @@ -2188,174 +2171,6 @@ static int32_t setupQueryRuntimeEnv(STaskRuntimeEnv *pRuntimeEnv, int32_t numOfT // group by normal column, sliding window query, interval query are handled by interval query processor // interval (down sampling operation) - int32_t numOfOperator = (int32_t) taosArrayGetSize(pOperator); - for(int32_t i = 0; i < numOfOperator; ++i) { - int32_t* op = taosArrayGet(pOperator, i); - - switch (*op) { -// case OP_TagScan: { -// pRuntimeEnv->proot = createTagScanOperatorInfo(pRuntimeEnv, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// break; -// } -// case OP_MultiTableTimeInterval: { -// pRuntimeEnv->proot = -// createMultiTableTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// break; -// } -// case OP_AllMultiTableTimeInterval: { -// pRuntimeEnv->proot = -// createAllMultiTableTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// break; -// } -// case OP_TimeWindow: { -// pRuntimeEnv->proot = -// createTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// case OP_AllTimeWindow: { -// pRuntimeEnv->proot = -// createAllTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// case OP_Groupby: { -// pRuntimeEnv->proot = -// createGroupbyOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// case OP_SessionWindow: { -// pRuntimeEnv->proot = -// createSWindowOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// case OP_MultiTableAggregate: { -// pRuntimeEnv->proot = -// createMultiTableAggOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// break; -// } -// case OP_Aggregate: { -// pRuntimeEnv->proot = -// createAggregateOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// -// case OP_Project: { // TODO refactor to remove arith operator. -// SOperatorInfo* prev = pRuntimeEnv->proot; -// if (i == 0) { -// pRuntimeEnv->proot = createProjectOperatorInfo(pRuntimeEnv, prev, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// if (pRuntimeEnv->proot != NULL && prev->operatorType != OP_DummyInput && prev->operatorType != OP_Join) { // TODO refactor -// setTableScanFilterOperatorInfo(prev->info, pRuntimeEnv->proot); -// } -// } else { -// prev = pRuntimeEnv->proot; -// assert(pQueryAttr->pExpr2 != NULL); -// pRuntimeEnv->proot = createProjectOperatorInfo(pRuntimeEnv, prev, pQueryAttr->pExpr2, pQueryAttr->numOfExpr2); -// } -// break; -// } -// -// case OP_StateWindow: { -// pRuntimeEnv->proot = createStatewindowOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; -// if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); -// } -// break; -// } -// -// case OP_Limit: { -// pRuntimeEnv->proot = createLimitOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot); -// break; -// } -// -// case OP_Filter: { // todo refactor -// int32_t numOfFilterCols = 0; -// if (pQueryAttr->stableQuery) { -// SColumnInfo* pColInfo = -// extractColumnFilterInfo(pQueryAttr->pExpr3, pQueryAttr->numOfExpr3, &numOfFilterCols); -// pRuntimeEnv->proot = createFilterOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr3, -// pQueryAttr->numOfExpr3, pColInfo, numOfFilterCols); -// freeColumnInfo(pColInfo, pQueryAttr->numOfExpr3); -// } else { -// SColumnInfo* pColInfo = -// extractColumnFilterInfo(pQueryAttr->pExpr1, pQueryAttr->numOfOutput, &numOfFilterCols); -// pRuntimeEnv->proot = createFilterOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, -// pQueryAttr->numOfOutput, pColInfo, numOfFilterCols); -// freeColumnInfo(pColInfo, pQueryAttr->numOfOutput); -// } -// -// break; -// } -// -// case OP_Fill: { -// SOperatorInfo* pInfo = pRuntimeEnv->proot; -// pRuntimeEnv->proot = createFillOperatorInfo(pRuntimeEnv, pInfo, pInfo->pExpr, pInfo->numOfOutput, pQueryAttr->multigroupResult); -// break; -// } -// -// case OP_MultiwayMergeSort: { -// pRuntimeEnv->proot = createMultiwaySortOperatorInfo(pRuntimeEnv, pQueryAttr->pExpr1, pQueryAttr->numOfOutput, 4096, merger); -// break; -// } -// -// case OP_GlobalAggregate: { // If fill operator exists, the result rows of different group can not be in the same SSDataBlock. -// bool multigroupResult = pQueryAttr->multigroupResult; -// if (pQueryAttr->multigroupResult) { -// multigroupResult = (pQueryAttr->fillType == TSDB_FILL_NONE); -// } -// -// pRuntimeEnv->proot = createGlobalAggregateOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr3, -// pQueryAttr->numOfExpr3, merger, pQueryAttr->pUdfInfo, multigroupResult); -// break; -// } -// -// case OP_SLimit: { -// int32_t num = pRuntimeEnv->proot->numOfOutput; -// SExprInfo* pExpr = pRuntimeEnv->proot->pExpr; -// pRuntimeEnv->proot = createSLimitOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pExpr, num, merger, pQueryAttr->multigroupResult); -// break; -// } -// -// case OP_Distinct: { -// pRuntimeEnv->proot = createDistinctOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// break; -// } -// -// case OP_Order: { -// pRuntimeEnv->proot = createOrderOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput, &pQueryAttr->order); -// break; -// } - - default: { - assert(0); - } - } - } - return TSDB_CODE_SUCCESS; _clean: @@ -2964,7 +2779,7 @@ void filterRowsInDataBlock(STaskRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInfo int8_t *p = calloc(numOfRows, sizeof(int8_t)); bool all = true; - +#if 0 if (pRuntimeEnv->pTsBuf != NULL) { SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, 0); @@ -2992,6 +2807,7 @@ void filterRowsInDataBlock(STaskRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInfo } else { all = doFilterDataBlock(pFilterInfo, numOfFilterCols, numOfRows, p); } +#endif if (!all) { doCompactSDataBlock(pBlock, numOfRows, p); @@ -3030,7 +2846,7 @@ void filterColRowsInDataBlock(STaskRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock, } // save the cursor status - pRuntimeEnv->current->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf); +// pRuntimeEnv->current->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf); } else { // all = filterExecute(pRuntimeEnv->pQueryAttr->pFilters, numOfRows, &p, pBlock->pBlockAgg, pRuntimeEnv->pQueryAttr->numOfCols); } @@ -3434,8 +3250,7 @@ void setTagValue(SOperatorInfo* pOperatorInfo, void *pTable, SqlFunctionCtx* pCt } } -void copyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, int32_t threshold, SSDataBlock* pBlock, int32_t* offset) { - SGroupResInfo* pGroupResInfo = &pRuntimeEnv->groupResInfo; +void copyToSDataBlock(SSDataBlock* pBlock, int32_t* offset, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pResBuf) { pBlock->info.rows = 0; int32_t code = TSDB_CODE_SUCCESS; @@ -3443,12 +3258,12 @@ void copyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, int32_t threshold, SSDataBlo // all results in current group have been returned to client, try next group if ((pGroupResInfo->pRows == NULL) || taosArrayGetSize(pGroupResInfo->pRows) == 0) { assert(pGroupResInfo->index == 0); - if ((code = mergeIntoGroupResult(&pRuntimeEnv->groupResInfo, pRuntimeEnv, offset)) != TSDB_CODE_SUCCESS) { +// if ((code = mergeIntoGroupResult(&pGroupResInfo, pRuntimeEnv, offset)) != TSDB_CODE_SUCCESS) { return; - } +// } } - doCopyToSDataBlock(pRuntimeEnv, pGroupResInfo, TSDB_ORDER_ASC, pBlock); +// doCopyToSDataBlock(pResBuf, pGroupResInfo, TSDB_ORDER_ASC, pBlock, ); // current data are all dumped to result buffer, clear it if (!hasRemainDataInCurrentGroup(pGroupResInfo)) { @@ -3459,9 +3274,9 @@ void copyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, int32_t threshold, SSDataBlo } // enough results in data buffer, return - if (pBlock->info.rows >= threshold) { - break; - } +// if (pBlock->info.rows >= threshold) { +// break; +// } } } @@ -3470,11 +3285,11 @@ static void updateTableQueryInfoForReverseScan(STableQueryInfo *pTableQueryInfo) return; } - TSWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY); - pTableQueryInfo->lastKey = pTableQueryInfo->win.skey; +// TSWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY); +// pTableQueryInfo->lastKey = pTableQueryInfo->win.skey; - SWITCH_ORDER(pTableQueryInfo->cur.order); - pTableQueryInfo->cur.vgroupIndex = -1; +// SWITCH_ORDER(pTableQueryInfo->cur.order); +// pTableQueryInfo->cur.vgroupIndex = -1; // set the index to be the end slot of result rows array SResultRowInfo* pResultRowInfo = &pTableQueryInfo->resInfo; @@ -3565,7 +3380,7 @@ void setDefaultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *pInfo, in initCtxOutputBuffer(pCtx, pDataBlock->info.numOfCols); } -void setDefaultOutputBuf_rv(SAggOperatorInfo* pAggInfo, int64_t uid, int32_t stage, SExecTaskInfo* pTaskInfo) { +void setDefaultOutputBuf_rv(SAggOperatorInfo* pAggInfo, int32_t stage, SExecTaskInfo* pTaskInfo) { SOptrBasicInfo *pInfo = &pAggInfo->binfo; SqlFunctionCtx* pCtx = pInfo->pCtx; @@ -3574,8 +3389,10 @@ void setDefaultOutputBuf_rv(SAggOperatorInfo* pAggInfo, int64_t uid, int32_t sta SResultRowInfo* pResultRowInfo = &pInfo->resultRowInfo; int64_t tid = 0; + int64_t groupId = 0; + pAggInfo->keyBuf = realloc(pAggInfo->keyBuf, sizeof(tid) + sizeof(int64_t) + POINTER_BYTES); - SResultRow* pRow = doSetResultOutBufByKey_rv(pResultRowInfo, tid, (char *)&tid, sizeof(tid), true, uid, pTaskInfo, false, pAggInfo); + SResultRow* pRow = doSetResultOutBufByKey_rv(pResultRowInfo, tid, (char *)&tid, sizeof(tid), true, groupId, pTaskInfo, false, pAggInfo); for (int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) { SColumnInfoData* pData = taosArrayGet(pDataBlock->pDataBlock, i); @@ -3781,24 +3598,19 @@ static bool hasMainOutput(STaskAttr *pQueryAttr) { return false; } -STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf) { +STableQueryInfo *createTableQueryInfo(void* buf, bool groupbyColumn, STimeWindow win) { STableQueryInfo *pTableQueryInfo = buf; - - pTableQueryInfo->win = win; pTableQueryInfo->lastKey = win.skey; - pTableQueryInfo->pTable = pTable; - pTableQueryInfo->cur.vgroupIndex = -1; - // set more initial size of interval/groupby query - if (QUERY_IS_INTERVAL_QUERY(pQueryAttr) || groupbyColumn) { +// if (/*QUERY_IS_INTERVAL_QUERY(pQueryAttr) || */groupbyColumn) { int32_t initialSize = 128; int32_t code = initResultRowInfo(&pTableQueryInfo->resInfo, initialSize, TSDB_DATA_TYPE_INT); if (code != TSDB_CODE_SUCCESS) { return NULL; } - } else { // in other aggregate query, do not initialize the windowResInfo - } +// } else { // in other aggregate query, do not initialize the windowResInfo +// } return pTableQueryInfo; } @@ -3806,12 +3618,9 @@ STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool STableQueryInfo* createTmpTableQueryInfo(STimeWindow win) { STableQueryInfo* pTableQueryInfo = calloc(1, sizeof(STableQueryInfo)); - pTableQueryInfo->win = win; +// pTableQueryInfo->win = win; pTableQueryInfo->lastKey = win.skey; - pTableQueryInfo->pTable = NULL; - pTableQueryInfo->cur.vgroupIndex = -1; - // set more initial size of interval/groupby query int32_t initialSize = 16; int32_t code = initResultRowInfo(&pTableQueryInfo->resInfo, initialSize, TSDB_DATA_TYPE_INT); @@ -3828,14 +3637,14 @@ void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) { return; } - taosVariantDestroy(&pTableQueryInfo->tag); +// taosVariantDestroy(&pTableQueryInfo->tag); cleanupResultRowInfo(&pTableQueryInfo->resInfo); } void setResultRowOutputBufInitCtx(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group - SFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); + SFilePage* bufPage = getBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); int32_t offset = 0; for (int32_t i = 0; i < numOfOutput; ++i) { @@ -3865,14 +3674,49 @@ void setResultRowOutputBufInitCtx(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pRes } } -void doSetTableGroupOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, SqlFunctionCtx* pCtx, - int32_t* rowCellInfoOffset, int32_t numOfOutput, int32_t tableGroupId) { +void setResultRowOutputBufInitCtx_rv(SDiskbasedBuf * pBuf, SResultRow *pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { + // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group + SFilePage* bufPage = getBufPage(pBuf, pResult->pageId); + + int32_t offset = 0; + for (int32_t i = 0; i < numOfOutput; ++i) { + pCtx[i].resultInfo = getResultCell(pResult, i, rowCellInfoOffset); + + struct SResultRowEntryInfo* pResInfo = pCtx[i].resultInfo; + if (isRowEntryCompleted(pResInfo) && isRowEntryInitialized(pResInfo)) { + offset += pCtx[i].resDataInfo.bytes; + continue; + } + + pCtx[i].pOutput = getPosInResultPage_rv(bufPage, pResult->offset, offset); + offset += pCtx[i].resDataInfo.bytes; + + int32_t functionId = pCtx[i].functionId; + if (functionId < 0) { + continue; + } + + if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM || functionId == FUNCTION_DIFF) { + if (i > 0) pCtx[i].ptsOutputBuf = pCtx[i - 1].pOutput; + } + + // if (!pResInfo->initialized) { + // aAggs[functionId].init(&pCtx[i], pResInfo); + // } + } +} + +void doSetTableGroupOutputBuf(SAggOperatorInfo* pAggInfo, int32_t numOfOutput, int32_t tableGroupId, SExecTaskInfo* pTaskInfo) { // for simple group by query without interval, all the tables belong to one group result. int64_t uid = 0; int64_t tid = 0; + SResultRowInfo* pResultRowInfo = &pAggInfo->binfo.resultRowInfo; + SqlFunctionCtx* pCtx = pAggInfo->binfo.pCtx; + int32_t* rowCellInfoOffset = pAggInfo->binfo.rowCellInfoOffset; + SResultRow* pResultRow = - doSetResultOutBufByKey(pRuntimeEnv, pResultRowInfo, tid, (char*)&tableGroupId, sizeof(tableGroupId), true, uid); + doSetResultOutBufByKey_rv(pResultRowInfo, tid, (char*)&tableGroupId, sizeof(tableGroupId), true, uid, pTaskInfo, false, pAggInfo); assert (pResultRow != NULL); /* @@ -3880,35 +3724,32 @@ void doSetTableGroupOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pRes * all group belong to one result set, and each group result has different group id so set the id to be one */ if (pResultRow->pageId == -1) { - int32_t ret = addNewWindowResultBuf(pResultRow, pRuntimeEnv->pResultBuf, tableGroupId, pRuntimeEnv->pQueryAttr->resultRowSize); + int32_t ret = addNewWindowResultBuf(pResultRow, pAggInfo->pResultBuf, tableGroupId, pAggInfo->binfo.resRowSize); if (ret != TSDB_CODE_SUCCESS) { return; } } - setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow, pCtx, numOfOutput, rowCellInfoOffset); + setResultRowOutputBufInitCtx_rv(pAggInfo->pResultBuf, pResultRow, pCtx, numOfOutput, rowCellInfoOffset); } -void setExecutionContext(STaskRuntimeEnv* pRuntimeEnv, SOptrBasicInfo* pInfo, int32_t numOfOutput, int32_t tableGroupId, - TSKEY nextKey) { - STableQueryInfo *pTableQueryInfo = pRuntimeEnv->current; - +void setExecutionContext(int32_t numOfOutput, int32_t tableGroupId, TSKEY nextKey, SExecTaskInfo* pTaskInfo, STableQueryInfo *pTableQueryInfo, SAggOperatorInfo* pAggInfo) { // lastKey needs to be updated pTableQueryInfo->lastKey = nextKey; - if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == tableGroupId) { + if (pAggInfo->groupId != INT32_MIN && pAggInfo->groupId == tableGroupId) { return; } - doSetTableGroupOutputBuf(pRuntimeEnv, &pInfo->resultRowInfo, pInfo->pCtx, pInfo->rowCellInfoOffset, numOfOutput, tableGroupId); + doSetTableGroupOutputBuf(pAggInfo, numOfOutput, tableGroupId, pTaskInfo); // record the current active group id - pRuntimeEnv->prevGroupId = tableGroupId; + pAggInfo->groupId = tableGroupId; } void setResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SqlFunctionCtx* pCtx, int32_t numOfCols, int32_t* rowCellInfoOffset) { // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group - SFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); + SFilePage *page = getBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); int16_t offset = 0; for (int32_t i = 0; i < numOfCols; ++i) { @@ -3957,7 +3798,7 @@ int32_t setTimestampListJoinInfo(STaskRuntimeEnv* pRuntimeEnv, SVariant* pTag, S STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; assert(pRuntimeEnv->pTsBuf != NULL); - +#if 0 // both the master and supplement scan needs to set the correct ts comp start position if (pTableQueryInfo->cur.vgroupIndex == -1) { taosVariantAssign(&pTableQueryInfo->tag, pTag); @@ -3991,7 +3832,7 @@ int32_t setTimestampListJoinInfo(STaskRuntimeEnv* pRuntimeEnv, SVariant* pTag, S //qDebug("QInfo:0x%"PRIx64" find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", GET_TASKID(pRuntimeEnv), pTag->i, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex); } } - +#endif return 0; } @@ -4081,7 +3922,7 @@ void setIntervalQueryRange(STaskRuntimeEnv *pRuntimeEnv, TSKEY key) { return; } - pTableQueryInfo->win.skey = key; +// pTableQueryInfo->win.skey = key; STimeWindow win = {.skey = key, .ekey = pQueryAttr->window.ekey}; /** @@ -4104,7 +3945,7 @@ void setIntervalQueryRange(STaskRuntimeEnv *pRuntimeEnv, TSKEY key) { // pResultRowInfo->prevSKey = w.skey; // } - pTableQueryInfo->lastKey = pTableQueryInfo->win.skey; +// pTableQueryInfo->lastKey = pTableQueryInfo->win.skey; } /** @@ -4117,9 +3958,7 @@ void setIntervalQueryRange(STaskRuntimeEnv *pRuntimeEnv, TSKEY key) { * @param result */ -static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock) { - STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; - +static int32_t doCopyToSDataBlock(SDiskbasedBuf *pBuf, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock, int32_t rowCapacity) { int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); int32_t numOfResult = pBlock->info.rows; // there are already exists result rows @@ -4145,13 +3984,13 @@ static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* p } int32_t numOfRowsToCopy = pRow->numOfRows; - if (numOfResult + numOfRowsToCopy >= pRuntimeEnv->resultInfo.capacity) { + if (numOfResult + numOfRowsToCopy >= rowCapacity) { break; } pGroupResInfo->index += 1; - SFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pRow->pageId); + SFilePage *page = getBufPage(pBuf, pRow->pageId); int32_t offset = 0; for (int32_t j = 0; j < pBlock->info.numOfCols; ++j) { @@ -4159,14 +3998,14 @@ static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* p int32_t bytes = pColInfoData->info.bytes; char *out = pColInfoData->pData + numOfResult * bytes; - char *in = getPosInResultPage(pQueryAttr, page, pRow->offset, offset); + char *in = getPosInResultPage_rv(page, pRow->offset, offset); memcpy(out, in, bytes * numOfRowsToCopy); offset += bytes; } numOfResult += numOfRowsToCopy; - if (numOfResult == pRuntimeEnv->resultInfo.capacity) { // output buffer is full + if (numOfResult == rowCapacity) { // output buffer is full break; } } @@ -4176,7 +4015,7 @@ static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* p return 0; } -static void toSSDataBlock(SGroupResInfo *pGroupResInfo, STaskRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock) { +static void toSDatablock(SGroupResInfo *pGroupResInfo, SDiskbasedBuf* pBuf, SSDataBlock* pBlock, int32_t rowCapacity) { assert(pGroupResInfo->currentGroup <= pGroupResInfo->totalGroup); pBlock->info.rows = 0; @@ -4184,29 +4023,19 @@ static void toSSDataBlock(SGroupResInfo *pGroupResInfo, STaskRuntimeEnv* pRuntim return; } - STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t orderType = TSDB_ORDER_ASC;//(pQueryAttr->pGroupbyExpr != NULL) ? pQueryAttr->pGroupbyExpr->orderType : TSDB_ORDER_ASC; - doCopyToSDataBlock(pRuntimeEnv, pGroupResInfo, orderType, pBlock); + doCopyToSDataBlock(pBuf, pGroupResInfo, orderType, pBlock, rowCapacity); - // refactor : extract method - SColumnInfoData* pInfoData = taosArrayGet(pBlock->pDataBlock, 0); - - //add condition (pBlock->info.rows >= 1) just to runtime happy - if (pInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP && pBlock->info.rows >= 1) { - STimeWindow* w = &pBlock->info.window; - w->skey = *(int64_t*)pInfoData->pData; - w->ekey = *(int64_t*)(((char*)pInfoData->pData) + TSDB_KEYSIZE * (pBlock->info.rows - 1)); - } + // add condition (pBlock->info.rows >= 1) just to runtime happy + blockDataUpdateTsWindow(pBlock); } -static void updateNumOfRowsInResultRows(STaskRuntimeEnv* pRuntimeEnv, SqlFunctionCtx* pCtx, int32_t numOfOutput, +static void updateNumOfRowsInResultRows(SqlFunctionCtx* pCtx, int32_t numOfOutput, SResultRowInfo* pResultRowInfo, int32_t* rowCellInfoOffset) { - STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; - // update the number of result for each, only update the number of rows for the corresponding window result. - if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { - return; - } +// if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { +// return; +// } for (int32_t i = 0; i < pResultRowInfo->size; ++i) { SResultRow *pResult = pResultRowInfo->pResult[i]; @@ -4217,8 +4046,8 @@ static void updateNumOfRowsInResultRows(STaskRuntimeEnv* pRuntimeEnv, SqlFunctio continue; } -// SResultRowEntryInfo* pCell = getResultCell(pResult, j, rowCellInfoOffset); -// pResult->numOfRows = (uint16_t)(TMAX(pResult->numOfRows, pCell->numOfRes)); + SResultRowEntryInfo* pCell = getResultCell(pResult, j, rowCellInfoOffset); + pResult->numOfRows = (uint16_t)(TMAX(pResult->numOfRows, pCell->numOfRes)); } } } @@ -4799,7 +4628,7 @@ int32_t doInitQInfo(SQInfo* pQInfo, STSBuf* pTsBuf, void* tsdb, void* sourceOptr getIntermediateBufInfo(pRuntimeEnv, &ps, &pQueryAttr->intermediateResultRowSize); int32_t TENMB = 1024*1024*10; - int32_t code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, ps, TENMB, pQInfo->qId, tsTempDir); + int32_t code = createDiskbasedBuffer(&pRuntimeEnv->pResultBuf, ps, TENMB, pQInfo->qId, tsTempDir); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4829,7 +4658,8 @@ int32_t doInitQInfo(SQInfo* pQInfo, STSBuf* pTsBuf, void* tsdb, void* sourceOptr } static void doTableQueryInfoTimeWindowCheck(SExecTaskInfo* pTaskInfo, STableQueryInfo* pTableQueryInfo, int32_t order) { - if (order == TSDB_ORDER_ASC) { +#if 0 + if (order == TSDB_ORDER_ASC) { assert( (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) && (pTableQueryInfo->lastKey >= pTaskInfo->window.skey) && @@ -4840,6 +4670,8 @@ static void doTableQueryInfoTimeWindowCheck(SExecTaskInfo* pTaskInfo, STableQuer (pTableQueryInfo->lastKey <= pTaskInfo->window.skey) && (pTableQueryInfo->win.skey <= pTaskInfo->window.skey && pTableQueryInfo->win.ekey >= pTaskInfo->window.ekey)); } +#endif + } //STsdbQueryCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win) { @@ -5102,14 +4934,16 @@ static SSDataBlock* doStreamBlockScan(void* param, bool* newgroup) { } int32_t loadRemoteDataCallback(void* param, const SDataBuf* pMsg, int32_t code) { - SExchangeInfo* pEx = (SExchangeInfo*) param; - pEx->pRsp = pMsg->pData; + SSourceDataInfo* pSourceDataInfo = (SSourceDataInfo*) param; + pSourceDataInfo->pRsp = pMsg->pData; - pEx->pRsp->numOfRows = htonl(pEx->pRsp->numOfRows); - pEx->pRsp->useconds = htobe64(pEx->pRsp->useconds); - pEx->pRsp->compLen = htonl(pEx->pRsp->compLen); + SRetrieveTableRsp* pRsp = pSourceDataInfo->pRsp; + pRsp->numOfRows = htonl(pRsp->numOfRows); + pRsp->useconds = htobe64(pRsp->useconds); + pRsp->compLen = htonl(pRsp->compLen); - tsem_post(&pEx->ready); + pSourceDataInfo->status = DATA_READY; + tsem_post(&pSourceDataInfo->pEx->ready); } static void destroySendMsgInfo(SMsgSendInfo* pMsgBody) { @@ -5139,115 +4973,234 @@ void qProcessFetchRsp(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { destroySendMsgInfo(pSendInfo); } -static SSDataBlock* doLoadRemoteData(void* param, bool* newgroup) { - SOperatorInfo *pOperator = (SOperatorInfo*) param; - - SExchangeInfo *pExchangeInfo = pOperator->info; - SExecTaskInfo *pTaskInfo = pOperator->pTaskInfo; - - *newgroup = false; - +static int32_t doSendFetchDataRequest(SExchangeInfo *pExchangeInfo, SExecTaskInfo *pTaskInfo, int32_t sourceIndex) { size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); - if (pExchangeInfo->current >= totalSources) { - qDebug("%s all %"PRIzu" source(s) are exhausted, total rows:%"PRIu64" bytes:%"PRIu64", elapsed:%.2f ms", GET_TASKID(pTaskInfo), totalSources, - pExchangeInfo->totalRows, pExchangeInfo->totalSize, pExchangeInfo->totalElapsed/1000.0); - return NULL; + + SResFetchReq* pMsg = calloc(1, sizeof(SResFetchReq)); + if (NULL == pMsg) { + pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY; + return pTaskInfo->code; } - SResFetchReq* pMsg = NULL; - SMsgSendInfo* pMsgSendInfo = NULL; + SDownstreamSource *pSource = taosArrayGet(pExchangeInfo->pSources, sourceIndex); + SSourceDataInfo *pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, sourceIndex); - while(1) { - pMsg = calloc(1, sizeof(SResFetchReq)); - if (NULL == pMsg) { // todo handle malloc error - pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY; - goto _error; + qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", %d/%" PRIzu, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->addr.epset.eps[0].fqdn, pSource->taskId, sourceIndex, totalSources); + + pMsg->header.vgId = htonl(pSource->addr.nodeId); + pMsg->sId = htobe64(pSource->schedId); + pMsg->taskId = htobe64(pSource->taskId); + pMsg->queryId = htobe64(pTaskInfo->id.queryId); + + // send the fetch remote task result reques + SMsgSendInfo* pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + tfree(pMsg); + qError("%s prepare message %d failed", GET_TASKID(pTaskInfo), (int32_t)sizeof(SMsgSendInfo)); + pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY; + return pTaskInfo->code; + } + + pMsgSendInfo->param = pDataInfo; + pMsgSendInfo->msgInfo.pData = pMsg; + pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq); + pMsgSendInfo->msgType = TDMT_VND_FETCH; + pMsgSendInfo->fp = loadRemoteDataCallback; + + int64_t transporterId = 0; + int32_t code = asyncSendMsgToServer(pExchangeInfo->pTransporter, &pSource->addr.epset, &transporterId, pMsgSendInfo); + return TSDB_CODE_SUCCESS; +} + +static int32_t setSDataBlockFromFetchRsp(SSDataBlock* pRes, SExchangeInfo *pExchangeInfo, SSourceDataInfo* pDataInfo, int32_t numOfOutput, int64_t startTs) { + char* pData = pDataInfo->pRsp->data; + SRetrieveTableRsp* pRsp = pDataInfo->pRsp; + + for (int32_t i = 0; i < numOfOutput; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pRes->pDataBlock, i); + + char* tmp = realloc(pColInfoData->pData, pColInfoData->info.bytes * pRsp->numOfRows); + if (tmp == NULL) { + return TSDB_CODE_QRY_OUT_OF_MEMORY; } - SDownstreamSource* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current); + size_t len = pRsp->numOfRows * pColInfoData->info.bytes; + memcpy(tmp, pData, len); - int64_t startTs = taosGetTimestampUs(); - qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", %d/%" PRIzu, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->addr.epset.eps[0].fqdn, pSource->taskId, pExchangeInfo->current, totalSources); + pColInfoData->pData = tmp; + pData += len; + } - pMsg->header.vgId = htonl(pSource->addr.nodeId); - pMsg->sId = htobe64(pSource->schedId); - pMsg->taskId = htobe64(pSource->taskId); - pMsg->queryId = htobe64(pTaskInfo->id.queryId); + pRes->info.rows = pRsp->numOfRows; - // send the fetch remote task result reques - pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); - if (NULL == pMsgSendInfo) { - qError("%s prepare message %d failed", GET_TASKID(pTaskInfo), (int32_t)sizeof(SMsgSendInfo)); - pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY; - goto _error; - } + int64_t el = taosGetTimestampUs() - startTs; - pMsgSendInfo->param = pExchangeInfo; - pMsgSendInfo->msgInfo.pData = pMsg; - pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq); - pMsgSendInfo->msgType = TDMT_VND_FETCH; - pMsgSendInfo->fp = loadRemoteDataCallback; + pExchangeInfo->totalRows += pRsp->numOfRows; + pExchangeInfo->totalSize += pRsp->compLen; + pDataInfo->totalRows += pRsp->numOfRows; - int64_t transporterId = 0; - int32_t code = asyncSendMsgToServer(pExchangeInfo->pTransporter, &pSource->addr.epset, &transporterId, pMsgSendInfo); - tsem_wait(&pExchangeInfo->ready); + pExchangeInfo->totalElapsed += el; - SRetrieveTableRsp* pRsp = pExchangeInfo->pRsp; - if (pRsp->numOfRows == 0) { - qDebug("%s vgId:%d, taskID:0x%"PRIx64" %d of total completed, rowsOfSource:%"PRIu64", totalRows:%"PRIu64" try next", - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pExchangeInfo->current + 1, - pExchangeInfo->rowsOfCurrentSource, pExchangeInfo->totalRows); + return TSDB_CODE_SUCCESS; +} - pExchangeInfo->rowsOfCurrentSource = 0; - pExchangeInfo->current += 1; +static void* setAllSourcesCompleted(SOperatorInfo *pOperator, int64_t startTs) { + SExchangeInfo *pExchangeInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - if (pExchangeInfo->current >= totalSources) { - int64_t el = taosGetTimestampUs() - startTs; - pExchangeInfo->totalElapsed += el; + int64_t el = taosGetTimestampUs() - startTs; + pExchangeInfo->totalElapsed += el; - qDebug("%s all %"PRIzu" sources are exhausted, total rows: %"PRIu64" bytes:%"PRIu64", elapsed:%.2f ms", GET_TASKID(pTaskInfo), totalSources, - pExchangeInfo->totalRows, pExchangeInfo->totalSize, pExchangeInfo->totalElapsed/1000.0); - return NULL; - } else { + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + qDebug("%s all %"PRIzu" sources are exhausted, total rows: %"PRIu64" bytes:%"PRIu64", elapsed:%.2f ms", GET_TASKID(pTaskInfo), totalSources, + pExchangeInfo->totalRows, pExchangeInfo->totalSize, pExchangeInfo->totalElapsed/1000.0); + + doSetOperatorCompleted(pOperator); + return NULL; +} + +static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo *pOperator, SExchangeInfo *pExchangeInfo, SExecTaskInfo *pTaskInfo) { + int32_t code = 0; + int64_t startTs = taosGetTimestampUs(); + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + + while (1) { + int32_t completed = 0; + for (int32_t i = 0; i < totalSources; ++i) { + SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, i); + + if (pDataInfo->status == DATA_EXHAUSTED) { + completed += 1; continue; } - } - SSDataBlock* pRes = pExchangeInfo->pResult; - char* pData = pRsp->data; + if (pDataInfo->status != DATA_READY) { + continue; + } - for (int32_t i = 0; i < pOperator->numOfOutput; ++i) { - SColumnInfoData* pColInfoData = taosArrayGet(pRes->pDataBlock, i); - char* tmp = realloc(pColInfoData->pData, pColInfoData->info.bytes * pRsp->numOfRows); - if (tmp == NULL) { + SRetrieveTableRsp* pRsp = pDataInfo->pRsp; + SDownstreamSource* pSource = taosArrayGet(pExchangeInfo->pSources, i); + + SSDataBlock* pRes = pExchangeInfo->pResult; + + if (pRsp->numOfRows == 0) { + qDebug("%s vgId:%d, taskID:0x%" PRIx64 " index:%d completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 " try next", + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i + 1, pDataInfo->totalRows, + pExchangeInfo->totalRows); + pDataInfo->status = DATA_EXHAUSTED; + completed += 1; + continue; + } + + code = setSDataBlockFromFetchRsp(pExchangeInfo->pResult, pExchangeInfo, pDataInfo, pOperator->numOfOutput, startTs); + if (code != 0) { goto _error; } - size_t len = pRsp->numOfRows * pColInfoData->info.bytes; - memcpy(tmp, pData, len); + if (pRsp->completed == 1) { + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, rowsOfSource:%" PRIu64 + ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, + pDataInfo->totalRows, pExchangeInfo->totalRows, pExchangeInfo->totalSize, i + 1, + totalSources); + pDataInfo->status = DATA_EXHAUSTED; + } else { + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64 ", totalBytes:%" PRIu64, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pExchangeInfo->totalRows, + pExchangeInfo->totalSize); + } - pColInfoData->pData = tmp; - pData += len; + if (pDataInfo->status != DATA_EXHAUSTED) { + pDataInfo->status = DATA_NOT_READY; + code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + return pExchangeInfo->pResult; } - pRes->info.numOfCols = pOperator->numOfOutput; - pRes->info.rows = pRsp->numOfRows; + if (completed == totalSources) { + return setAllSourcesCompleted(pOperator, startTs); + } + } - int64_t el = taosGetTimestampUs() - startTs; +_error: + pTaskInfo->code = code; + return NULL; +} - pExchangeInfo->totalRows += pRsp->numOfRows; - pExchangeInfo->totalSize += pRsp->compLen; - pExchangeInfo->rowsOfCurrentSource += pRsp->numOfRows; - pExchangeInfo->totalElapsed += el; +static SSDataBlock* concurrentlyLoadRemoteData(SOperatorInfo *pOperator) { + SExchangeInfo *pExchangeInfo = pOperator->info; + SExecTaskInfo *pTaskInfo = pOperator->pTaskInfo; + + if (pOperator->status == OP_RES_TO_RETURN) { + return concurrentlyLoadRemoteDataImpl(pOperator, pExchangeInfo, pTaskInfo); + } + + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + int64_t startTs = taosGetTimestampUs(); + + // Asynchronously send all fetch requests to all sources. + for(int32_t i = 0; i < totalSources; ++i) { + int32_t code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i); + if (code != TSDB_CODE_SUCCESS) { + return NULL; + } + } + + int64_t endTs = taosGetTimestampUs(); + qDebug("%s send all fetch request to %"PRIzu" sources completed, elapsed:%"PRId64, GET_TASKID(pTaskInfo), totalSources, endTs - startTs); + + tsem_wait(&pExchangeInfo->ready); + + pOperator->status = OP_RES_TO_RETURN; + return concurrentlyLoadRemoteDataImpl(pOperator, pExchangeInfo, pTaskInfo); +} + +static SSDataBlock* seqLoadRemoteData(SOperatorInfo *pOperator) { + SExchangeInfo *pExchangeInfo = pOperator->info; + SExecTaskInfo *pTaskInfo = pOperator->pTaskInfo; + + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + int64_t startTs = taosGetTimestampUs(); + + while(1) { + if (pExchangeInfo->current >= totalSources) { + return setAllSourcesCompleted(pOperator, startTs); + } + + doSendFetchDataRequest(pExchangeInfo, pTaskInfo, pExchangeInfo->current); + + tsem_wait(&pExchangeInfo->ready); + + SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, pExchangeInfo->current); + SDownstreamSource* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current); + + SRetrieveTableRsp* pRsp = pDataInfo->pRsp; + if (pRsp->numOfRows == 0) { + qDebug("%s vgId:%d, taskID:0x%"PRIx64" %d of total completed, rowsOfSource:%"PRIu64", totalRows:%"PRIu64" try next", + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pExchangeInfo->current + 1, + pDataInfo->totalRows, pExchangeInfo->totalRows); + + pDataInfo->status = DATA_EXHAUSTED; + pExchangeInfo->current += 1; + continue; + } + + SSDataBlock* pRes = pExchangeInfo->pResult; + setSDataBlockFromFetchRsp(pExchangeInfo->pResult, pExchangeInfo, pDataInfo, pOperator->numOfOutput, startTs); if (pRsp->completed == 1) { qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, rowsOfSource:%" PRIu64 - ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pExchangeInfo->rowsOfCurrentSource, pExchangeInfo->totalRows, pExchangeInfo->totalSize, - pExchangeInfo->current + 1, totalSources); + ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, + pDataInfo->totalRows, pExchangeInfo->totalRows, pExchangeInfo->totalSize, pExchangeInfo->current + 1, + totalSources); - pExchangeInfo->rowsOfCurrentSource = 0; + pDataInfo->status = DATA_EXHAUSTED; pExchangeInfo->current += 1; } else { qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64 ", totalBytes:%" PRIu64, @@ -5256,13 +5209,38 @@ static SSDataBlock* doLoadRemoteData(void* param, bool* newgroup) { return pExchangeInfo->pResult; } +} +static SSDataBlock* doLoadRemoteData(void* param, bool* newgroup) { + SOperatorInfo *pOperator = (SOperatorInfo*) param; + + SExchangeInfo *pExchangeInfo = pOperator->info; + SExecTaskInfo *pTaskInfo = pOperator->pTaskInfo; + + size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources); + + if (pOperator->status == OP_EXEC_DONE) { + qDebug("%s all %"PRIzu" source(s) are exhausted, total rows:%"PRIu64" bytes:%"PRIu64", elapsed:%.2f ms", GET_TASKID(pTaskInfo), totalSources, + pExchangeInfo->totalRows, pExchangeInfo->totalSize, pExchangeInfo->totalElapsed/1000.0); + return NULL; + } + + *newgroup = false; + + if (pExchangeInfo->seqLoadData) { + return seqLoadRemoteData(pOperator); + } else { + return concurrentlyLoadRemoteData(pOperator); + } + +#if 0 _error: tfree(pMsg); tfree(pMsgSendInfo); terrno = pTaskInfo->code; return NULL; +#endif } static SSDataBlock* createResultDataBlock(const SArray* pExprInfo); @@ -5278,11 +5256,33 @@ SOperatorInfo* createExchangeOperatorInfo(const SArray* pSources, const SArray* return NULL; } + size_t numOfSources = taosArrayGetSize(pSources); + pInfo->pSources = taosArrayDup(pSources); - assert(taosArrayGetSize(pInfo->pSources) > 0); + pInfo->pSourceDataInfo = taosArrayInit(numOfSources, sizeof(SSourceDataInfo)); + if (pInfo->pSourceDataInfo == NULL || pInfo->pSources == NULL) { + tfree(pInfo); + tfree(pOperator); + taosArrayDestroy(pInfo->pSources); + taosArrayDestroy(pInfo->pSourceDataInfo); + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return NULL; + } + + for(int32_t i = 0; i < numOfSources; ++i) { + SSourceDataInfo dataInfo = {0}; + dataInfo.status = DATA_NOT_READY; + dataInfo.pEx = pInfo; + dataInfo.index = i; + + taosArrayPush(pInfo->pSourceDataInfo, &dataInfo); + } size_t size = taosArrayGetSize(pExprInfo); pInfo->pResult = createResultDataBlock(pExprInfo); + pInfo->seqLoadData = true; + + tsem_init(&pInfo->ready, 0, 0); pOperator->name = "ExchangeOperator"; pOperator->operatorType = OP_Exchange; @@ -5290,7 +5290,6 @@ SOperatorInfo* createExchangeOperatorInfo(const SArray* pSources, const SArray* pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; pOperator->numOfOutput = size; - pOperator->pRuntimeEnv = NULL; pOperator->exec = doLoadRemoteData; pOperator->pTaskInfo = pTaskInfo; @@ -5316,6 +5315,7 @@ SOperatorInfo* createExchangeOperatorInfo(const SArray* pSources, const SArray* } } #endif + return pOperator; } @@ -5344,7 +5344,7 @@ SSDataBlock* createResultDataBlock(const SArray* pExprInfo) { return pResBlock; } -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbReadHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime, int32_t reverseTime, SExecTaskInfo* pTaskInfo) { +SOperatorInfo* createTableScanOperatorInfo(void* pTsdbReadHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime, int32_t reverseTime, SExecTaskInfo* pTaskInfo) { assert(repeatTime > 0); STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); @@ -5364,8 +5364,8 @@ SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbReadHandle, int32_t order, pInfo->current = 0; pInfo->scanFlag = MAIN_SCAN; - pOperator->name = "DataBlocksOptimizedScanOperator"; - pOperator->operatorType = OP_DataBlocksOptScan; + pOperator->name = "TableScanOperator"; + pOperator->operatorType = OP_TableScan; pOperator->blockingOptr = false; pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; @@ -5376,7 +5376,7 @@ SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbReadHandle, int32_t order, return pOperator; } -SOperatorInfo* createTableSeqScanOperator(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv) { +SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbReadHandle, STaskRuntimeEnv* pRuntimeEnv) { STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); pInfo->pTsdbReadHandle = pTsdbReadHandle; @@ -5604,10 +5604,11 @@ static void destroyGlobalAggOperatorInfo(void* param, int32_t numOfOutput) { tfree(pInfo->prevRow); tfree(pInfo->currentGroupColData); } + static void destroySlimitOperatorInfo(void* param, int32_t numOfOutput) { SSLimitOperatorInfo *pInfo = (SSLimitOperatorInfo*) param; taosArrayDestroy(pInfo->orderColumnList); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); tfree(pInfo->prevRow); } @@ -5719,39 +5720,397 @@ SOperatorInfo *createMultiwaySortOperatorInfo(STaskRuntimeEnv *pRuntimeEnv, SExp return pOperator; } -static int32_t doMergeSDatablock(SSDataBlock* pDest, SSDataBlock* pSrc) { - assert(pSrc != NULL && pDest != NULL && pDest->info.numOfCols == pSrc->info.numOfCols); +typedef struct SExternalMemSource { + SArray* pageIdList; + int32_t pageIndex; + int32_t sourceId; + int32_t rowIndex; + SSDataBlock *pBlock; +} SExternalMemSource; - int32_t numOfCols = pSrc->info.numOfCols; - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pCol2 = taosArrayGet(pDest->pDataBlock, i); - SColumnInfoData* pCol1 = taosArrayGet(pSrc->pDataBlock, i); +int32_t msortComparFn(const void *pLeft, const void *pRight, void *param) { + int32_t pLeftIdx = *(int32_t *)pLeft; + int32_t pRightIdx = *(int32_t *)pRight; - int32_t newSize = (pDest->info.rows + pSrc->info.rows) * pCol2->info.bytes; - char* tmp = realloc(pCol2->pData, newSize); - if (tmp != NULL) { - pCol2->pData = tmp; - int32_t offset = pCol2->info.bytes * pDest->info.rows; - memcpy(pCol2->pData + offset, pCol1->pData, pSrc->info.rows * pCol2->info.bytes); + SMsortComparParam *pParam = (SMsortComparParam *)param; + + SArray *pInfo = pParam->orderInfo; + + SExternalMemSource* pLeftSource = pParam->pSources[pLeftIdx]; + SExternalMemSource* pRightSource = pParam->pSources[pRightIdx]; + + // this input is exhausted, set the special value to denote this + if (pLeftSource->rowIndex == -1) { + return 1; + } + + if (pRightSource->rowIndex == -1) { + return -1; + } + + SSDataBlock* pLeftBlock = pLeftSource->pBlock; + SSDataBlock* pRightBlock = pRightSource->pBlock; + + for(int32_t i = 0; i < pInfo->size; ++i) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); + + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->colIndex); + + bool leftNull = false; + if (pLeftColInfoData->hasNull) { + leftNull = colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->rowIndex, pLeftBlock->pBlockAgg); + } + + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->colIndex); + bool rightNull = false; + if (pRightColInfoData->hasNull) { + rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->rowIndex, pRightBlock->pBlockAgg); + } + + if (leftNull && rightNull) { + continue; // continue to next slot + } + + if (rightNull) { + return pParam->nullFirst? 1:-1; + } + + if (leftNull) { + return pParam->nullFirst? -1:1; + } + + void* left1 = colDataGet(pLeftColInfoData, pLeftSource->rowIndex); + void* right1 = colDataGet(pRightColInfoData, pRightSource->rowIndex); + + switch(pLeftColInfoData->info.type) { + case TSDB_DATA_TYPE_INT: { + int32_t leftv = *(int32_t*)left1; + int32_t rightv = *(int32_t*)right1; + + if (leftv == rightv) { + break; + } else { + if (pOrder->order == TSDB_ORDER_ASC) { + return leftv < rightv? -1 : 1; + } else { + return leftv < rightv? 1 : -1; + } + } + } + default: + assert(0); + } + } +} + +static int32_t adjustMergeTreeForNextTuple(SExternalMemSource *pSource, SMultiwayMergeTreeInfo *pTree, SOrderOperatorInfo* pInfo) { + /* + * load a new SDataBlock into memory of a given intermediate data-set source, + * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree + */ + if (pSource->rowIndex >= pSource->pBlock->info.rows) { + pSource->rowIndex = 0; + pSource->pageIndex += 1; + + if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { + pInfo->numOfCompleted += 1; + pSource->rowIndex = -1; + pSource->pageIndex = -1; + pSource->pBlock = blockDataDestroy(pSource->pBlock); } else { - return TSDB_CODE_VND_OUT_OF_MEMORY; + SPageInfo* pPgInfo = *(SPageInfo**)taosArrayGet(pSource->pageIdList, pSource->pageIndex); + + SFilePage* pPage = getBufPage(pInfo->pSortInternalBuf, getPageId(pPgInfo)); + int32_t code = blockDataFromBuf(pSource->pBlock, pPage->data); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + releaseBufPage(pInfo->pSortInternalBuf, pPage); } } - pDest->info.rows += pSrc->info.rows; + /* + * Adjust loser tree otherwise, according to new candidate data + * if the loser tree is rebuild completed, we do not need to adjust + */ + int32_t leafNodeIndex = tMergeTreeGetAdjustIndex(pTree); + +#ifdef _DEBUG_VIEW + printf("before adjust:\t"); + tMergeTreePrint(pTree); +#endif + + tMergeTreeAdjust(pTree, leafNodeIndex); + +#ifdef _DEBUG_VIEW + printf("\nafter adjust:\t"); + tMergeTreePrint(pTree); +#endif +} + +static void appendOneRowToDataBlock(SSDataBlock *pBlock, const SSDataBlock* pSource, int32_t* rowIndex) { + for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + + SColumnInfoData* pSrcColInfo = taosArrayGet(pSource->pDataBlock, i); + bool isNull = colDataIsNull(pSrcColInfo, pSource->info.rows, *rowIndex, NULL); + + if (isNull) { + colDataAppend(pColInfo, pBlock->info.rows, NULL, true); + } else { + char* pData = colDataGet(pSrcColInfo, *rowIndex); + colDataAppend(pColInfo, pBlock->info.rows, pData, false); + } + } + + pBlock->info.rows += 1; + *rowIndex += 1; +} + +static int32_t doAddNewSource(SOrderOperatorInfo* pInfo, SArray* pAllSources, int32_t numOfCols) { + SExternalMemSource* pSource = calloc(1, sizeof(SExternalMemSource)); + if (pSource == NULL) { + return TSDB_CODE_QRY_OUT_OF_MEMORY; + } + + pSource->pageIdList = getDataBufPagesIdList(pInfo->pSortInternalBuf, pInfo->sourceId); + pSource->sourceId = pInfo->sourceId; + + pSource->pBlock = calloc(1, sizeof(SSDataBlock)); + pSource->pBlock->pDataBlock = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); + pSource->pBlock->info.numOfCols = numOfCols; + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData colInfo = {0}; + SColumnInfoData* p = taosArrayGet(pInfo->pDataBlock->pDataBlock, i); + colInfo.info = p->info; + taosArrayPush(pSource->pBlock->pDataBlock, &colInfo); + } + + taosArrayPush(pAllSources, &pSource); + + pInfo->sourceId += 1; + + int32_t rowSize = blockDataGetSerialRowSize(pSource->pBlock); + int32_t numOfRows = (getBufPageSize(pInfo->pSortInternalBuf) - blockDataGetSerialMetaSize(pInfo->pDataBlock))/rowSize; + + return blockDataEnsureCapacity(pSource->pBlock, numOfRows); +} + +void addToDiskbasedBuf(SOrderOperatorInfo* pInfo, SArray* pSources, jmp_buf env) { + int32_t start = 0; + + while(start < pInfo->pDataBlock->info.rows) { + int32_t stop = 0; + blockDataSplitRows(pInfo->pDataBlock, pInfo->hasVarCol, start, &stop, getBufPageSize(pInfo->pSortInternalBuf)); + SSDataBlock* p = blockDataExtractBlock(pInfo->pDataBlock, start, stop - start + 1); + if (p == NULL) { + longjmp(env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + int32_t pageId = -1; + SFilePage* pPage = getNewDataBuf(pInfo->pSortInternalBuf, pInfo->sourceId, &pageId); + if (pPage == NULL) { + assert(0); + longjmp(env, terrno); + } + + int32_t size = blockDataGetSize(p) + sizeof(int32_t) + p->info.numOfCols * sizeof(int32_t); + assert(size <= getBufPageSize(pInfo->pSortInternalBuf)); + + blockDataToBuf(pPage->data, p); + + setBufPageDirty(pPage, true); + releaseBufPage(pInfo->pSortInternalBuf, pPage); + + blockDataDestroy(p); + start = stop + 1; + } + + int32_t numOfCols = pInfo->pDataBlock->info.numOfCols; + blockDataClearup(pInfo->pDataBlock, pInfo->hasVarCol); + + int32_t code = doAddNewSource(pInfo, pSources, numOfCols); + if (code != TSDB_CODE_SUCCESS) { + longjmp(env, code); + } +} + +static int32_t sortComparInit(SMsortComparParam* cmpParam, SArray* pSources, int32_t startIndex, int32_t endIndex, SDiskbasedBuf* pBuf) { + cmpParam->pSources = taosArrayGet(pSources, startIndex); + cmpParam->numOfSources = (endIndex - startIndex + 1); + + for(int32_t i = 0; i < cmpParam->numOfSources; ++i) { + SExternalMemSource* pSource = cmpParam->pSources[i]; + SPageInfo* pPgInfo = *(SPageInfo**) taosArrayGet(pSource->pageIdList, pSource->pageIndex); + + SFilePage* pPage = getBufPage(pBuf, getPageId(pPgInfo)); + int32_t code = blockDataFromBuf(cmpParam->pSources[i]->pBlock, pPage->data); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + releaseBufPage(pBuf, pPage); + } return TSDB_CODE_SUCCESS; } +static int32_t sortComparClearup(SMsortComparParam* cmpParam) { + for(int32_t i = 0; i < cmpParam->numOfSources; ++i) { + SExternalMemSource* pSource = cmpParam->pSources[i]; + blockDataDestroy(pSource->pBlock); + tfree(pSource); + } + + cmpParam->numOfSources = 0; +} + +static SSDataBlock* getSortedBlockData(SExecTaskInfo* pTaskInfo, SOrderOperatorInfo* pInfo, SMsortComparParam* cmpParam, int32_t capacity) { + blockDataClearup(pInfo->pDataBlock, pInfo->hasVarCol); + + while(1) { + if (cmpParam->numOfSources == pInfo->numOfCompleted) { + break; + } + + int32_t index = tMergeTreeGetChosenIndex(pInfo->pMergeTree); + + SExternalMemSource *pSource = (*cmpParam).pSources[index]; + appendOneRowToDataBlock(pInfo->pDataBlock, pSource->pBlock, &pSource->rowIndex); + + int32_t code = adjustMergeTreeForNextTuple(pSource, pInfo->pMergeTree, pInfo); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + if (pInfo->pDataBlock->info.rows >= capacity) { + return pInfo->pDataBlock; + } + } + + return (pInfo->pDataBlock->info.rows > 0)? pInfo->pDataBlock:NULL; +} + +static int32_t doInternalSort(SExecTaskInfo* pTaskInfo, SOrderOperatorInfo* pInfo) { + size_t numOfSources = taosArrayGetSize(pInfo->pSources); + + // Calculate the I/O counts to complete the data sort. + double sortCount = floorl(log2(numOfSources) / log2(getNumOfInMemBufPages(pInfo->pSortInternalBuf))); + + pInfo->totalElapsed = taosGetTimestampUs() - pInfo->startTs; + qDebug("%s %d rounds mergesort required to complete the sort, first-round sorted data size:%"PRIzu", sort:%"PRId64", total elapsed:%"PRId64, + GET_TASKID(pTaskInfo), (int32_t) (sortCount + 1), getTotalBufSize(pInfo->pSortInternalBuf), pInfo->sortElapsed, + pInfo->totalElapsed); + + size_t pgSize = getBufPageSize(pInfo->pSortInternalBuf); + int32_t numOfRows = (pgSize - blockDataGetSerialMetaSize(pInfo->pDataBlock))/ blockDataGetSerialRowSize(pInfo->pDataBlock); + + blockDataEnsureCapacity(pInfo->pDataBlock, numOfRows); + + size_t numOfSorted = taosArrayGetSize(pInfo->pSources); + for(int32_t t = 0; t < sortCount; ++t) { + int64_t st = taosGetTimestampUs(); + + SArray* pResList = taosArrayInit(4, POINTER_BYTES); + SMsortComparParam resultParam = {.orderInfo = pInfo->cmpParam.orderInfo}; + + int32_t numOfInputSources = getNumOfInMemBufPages(pInfo->pSortInternalBuf); + int32_t sortGroup = (numOfSorted + numOfInputSources - 1) / numOfInputSources; + + // Only *numOfInputSources* can be loaded into buffer to perform the external sort. + for(int32_t i = 0; i < sortGroup; ++i) { + pInfo->sourceId += 1; + + int32_t end = (i + 1) * numOfInputSources - 1; + if (end > numOfSorted - 1) { + end = numOfSorted - 1; + } + + pInfo->cmpParam.numOfSources = end - i * numOfInputSources + 1; + + int32_t code = sortComparInit(&pInfo->cmpParam, pInfo->pSources, i * numOfInputSources, end, pInfo->pSortInternalBuf); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + code = tMergeTreeCreate(&pInfo->pMergeTree, pInfo->cmpParam.numOfSources, &pInfo->cmpParam, msortComparFn); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + while (1) { + SSDataBlock* pDataBlock = getSortedBlockData(pTaskInfo, pInfo, &pInfo->cmpParam, numOfRows); + if (pDataBlock == NULL) { + break; + } + + int32_t pageId = -1; + SFilePage* pPage = getNewDataBuf(pInfo->pSortInternalBuf, pInfo->sourceId, &pageId); + if (pPage == NULL) { + assert(0); + longjmp(pTaskInfo->env, terrno); + } + + int32_t size = blockDataGetSize(pDataBlock) + sizeof(int32_t) + pDataBlock->info.numOfCols * sizeof(int32_t); + assert(size <= getBufPageSize(pInfo->pSortInternalBuf)); + + blockDataToBuf(pPage->data, pDataBlock); + + setBufPageDirty(pPage, true); + releaseBufPage(pInfo->pSortInternalBuf, pPage); + + blockDataClearup(pDataBlock, pInfo->hasVarCol); + } + + tMergeTreeDestroy(pInfo->pMergeTree); + pInfo->numOfCompleted = 0; + + code = doAddNewSource(pInfo, pResList, pInfo->pDataBlock->info.numOfCols); + if (code != 0) { + longjmp(pTaskInfo->env, code); + } + } + + sortComparClearup(&pInfo->cmpParam); + + taosArrayClear(pInfo->pSources); + taosArrayAddAll(pInfo->pSources, pResList); + taosArrayDestroy(pResList); + + pInfo->cmpParam = resultParam; + numOfSorted = taosArrayGetSize(pInfo->pSources); + + int64_t el = taosGetTimestampUs() - st; + pInfo->totalElapsed += el; + + SDiskbasedBufStatis statis = getDBufStatis(pInfo->pSortInternalBuf); + + qDebug("%s %d round mergesort, elapsed:%"PRId64" readDisk:%.2f Kb, flushDisk:%.2f Kb", GET_TASKID(pTaskInfo), t + 1, el, statis.loadBytes/1024.0, + statis.flushBytes/1024.0); + } + + pInfo->cmpParam.numOfSources = taosArrayGetSize(pInfo->pSources); + return 0; +} + static SSDataBlock* doSort(void* param, bool* newgroup) { SOperatorInfo* pOperator = (SOperatorInfo*) param; if (pOperator->status == OP_EXEC_DONE) { return NULL; } + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SOrderOperatorInfo* pInfo = pOperator->info; - SSDataBlock* pBlock = NULL; + + if (pOperator->status == OP_RES_TO_RETURN) { + return getSortedBlockData(pTaskInfo, pInfo, &pInfo->cmpParam, pInfo->numOfRowsInRes); + } + + int64_t st = taosGetTimestampUs(); + while(1) { publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); @@ -5759,68 +6118,130 @@ static SSDataBlock* doSort(void* param, bool* newgroup) { // start to flush data into disk and try do multiway merge sort if (pBlock == NULL) { - doSetOperatorCompleted(pOperator); break; } - int32_t code = doMergeSDatablock(pInfo->pDataBlock, pBlock); + int32_t code = blockDataMerge(pInfo->pDataBlock, pBlock); if (code != TSDB_CODE_SUCCESS) { - // todo handle error + longjmp(pOperator->pTaskInfo->env, code); + } + + size_t size = blockDataGetSize(pInfo->pDataBlock); + if (size > pInfo->sortBufSize) { + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + blockDataSort(pInfo->pDataBlock, pInfo->cmpParam.orderInfo, pInfo->cmpParam.nullFirst); + + int64_t el = taosGetTimestampUs() - p; + pInfo->sortElapsed += el; + + addToDiskbasedBuf(pInfo, pInfo->pSources, pTaskInfo->env); } } - int32_t numOfCols = pInfo->pDataBlock->info.numOfCols; - void** pCols = calloc(numOfCols, POINTER_BYTES); - SSchema* pSchema = calloc(numOfCols, sizeof(SSchema)); + if (pInfo->pDataBlock->info.rows > 0) { + // Perform the in-memory sort and then flush data in the buffer into disk. + blockDataSort(pInfo->pDataBlock, pInfo->cmpParam.orderInfo, pInfo->cmpParam.nullFirst); - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* p1 = taosArrayGet(pInfo->pDataBlock->pDataBlock, i); - pCols[i] = p1->pData; - pSchema[i].colId = p1->info.colId; - pSchema[i].bytes = p1->info.bytes; - pSchema[i].type = (uint8_t) p1->info.type; + // All sorted data are resident in memory, external memory sort is not needed. + // Return to the upstream operator directly + if (isAllDataInMemBuf(pInfo->pSortInternalBuf)) { + pOperator->status = OP_EXEC_DONE; + return (pInfo->pDataBlock->info.rows == 0)? NULL:pInfo->pDataBlock; + } + + addToDiskbasedBuf(pInfo, pInfo->pSources, pTaskInfo->env); } - __compar_fn_t comp = getKeyComparFunc(pSchema[pInfo->colIndex].type, pInfo->order); -// taosqsort(pCols, pSchema, numOfCols, pInfo->pDataBlock->info.rows, pInfo->colIndex, comp); + doInternalSort(pTaskInfo, pInfo); - tfree(pCols); - tfree(pSchema); - return (pInfo->pDataBlock->info.rows > 0)? pInfo->pDataBlock:NULL; + int32_t code = blockDataEnsureCapacity(pInfo->pDataBlock, pInfo->numOfRowsInRes); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + int32_t numOfSources = taosArrayGetSize(pInfo->pSources); + ASSERT(numOfSources <= getNumOfInMemBufPages(pInfo->pSortInternalBuf)); + code = sortComparInit(&pInfo->cmpParam, pInfo->pSources, 0, numOfSources - 1, pInfo->pSortInternalBuf); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + code = tMergeTreeCreate(&pInfo->pMergeTree, pInfo->cmpParam.numOfSources, &pInfo->cmpParam, msortComparFn); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + pOperator->status = OP_RES_TO_RETURN; + return getSortedBlockData(pTaskInfo, pInfo, &pInfo->cmpParam, pInfo->numOfRowsInRes); } -SOperatorInfo *createOrderOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal) { - SOrderOperatorInfo* pInfo = calloc(1, sizeof(SOrderOperatorInfo)); +static SArray* createBlockOrder(SArray* pExprInfo, SArray* pOrderVal) { + SArray* pOrderInfo = taosArrayInit(1, sizeof(SBlockOrderInfo)); - { - SSDataBlock* pDataBlock = calloc(1, sizeof(SSDataBlock)); - pDataBlock->pDataBlock = taosArrayInit(numOfOutput, sizeof(SColumnInfoData)); - for(int32_t i = 0; i < numOfOutput; ++i) { - SColumnInfoData col = {{0}}; - col.info.colId = pExpr[i].base.pColumns->info.colId; -// col.info.bytes = pExpr[i].base.colBytes; -// col.info.type = pExpr[i].base.colType; - taosArrayPush(pDataBlock->pDataBlock, &col); + size_t numOfOrder = taosArrayGetSize(pOrderVal); + for (int32_t j = 0; j < numOfOrder; ++j) { + SBlockOrderInfo orderInfo = {0}; + SOrder* pOrder = taosArrayGet(pOrderVal, j); + orderInfo.order = pOrder->order; -// if (col.info.colId == pOrderVal->orderColId) { -// pInfo->colIndex = i; -// } + for (int32_t i = 0; i < taosArrayGetSize(pExprInfo); ++i) { + SExprInfo* pExpr = taosArrayGet(pExprInfo, i); + if (pExpr->base.resSchema.colId == pOrder->col.info.colId) { + orderInfo.colIndex = i; + break; } + } - pDataBlock->info.numOfCols = numOfOutput; -// pInfo->order = pOrderVal->order; - pInfo->pDataBlock = pDataBlock; + taosArrayPush(pOrderInfo, &orderInfo); } + return pOrderInfo; +} + +SOperatorInfo *createOrderOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SArray* pOrderVal) { + SOrderOperatorInfo* pInfo = calloc(1, sizeof(SOrderOperatorInfo)); SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); - pOperator->name = "InMemoryOrder"; -// pOperator->operatorType = OP_Order; + if (pInfo == NULL || pOperator == NULL) { + tfree(pInfo); + + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return NULL; + } + + pInfo->sortBufSize = 1024 * 16; // 1MB + pInfo->bufPageSize = 1024; + pInfo->numOfRowsInRes = 1024; + + pInfo->pDataBlock = createOutputBuf_rv(pExprInfo, pInfo->numOfRowsInRes); + pInfo->pSources = taosArrayInit(4, POINTER_BYTES); + pInfo->cmpParam.orderInfo = createBlockOrder(pExprInfo, pOrderVal); + + for(int32_t i = 0; i < taosArrayGetSize(pExprInfo); ++i) { + SExprInfo* pExpr = taosArrayGetP(pExprInfo, i); + if (IS_VAR_DATA_TYPE(pExpr->base.resSchema.type)) { + pInfo->hasVarCol = true; + break; + } + } + + int32_t code = createDiskbasedBuffer(&pInfo->pSortInternalBuf, pInfo->bufPageSize, pInfo->sortBufSize, 1, "/tmp/"); + if (pInfo->pSources == NULL || code != 0 || pInfo->cmpParam.orderInfo == NULL || pInfo->pDataBlock == NULL) { + tfree(pOperator); + destroyOrderOperatorInfo(pInfo, taosArrayGetSize(pExprInfo)); + tfree(pInfo); + + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return NULL; + } + + pOperator->name = "Order"; + pOperator->operatorType = OP_Order; pOperator->blockingOptr = true; pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; pOperator->exec = doSort; - pOperator->cleanupFn = destroyOrderOperatorInfo; - pOperator->pRuntimeEnv = pRuntimeEnv; + pOperator->cleanupFn = destroyOrderOperatorInfo; appendDownstream(pOperator, downstream); return pOperator; @@ -5869,7 +6290,7 @@ static SSDataBlock* doAggregate(void* param, bool* newgroup) { return (pInfo->pRes->info.rows != 0)? pInfo->pRes:NULL; } -static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { +static SSDataBlock* doMultiTableAggregate(void* param, bool* newgroup) { SOperatorInfo* pOperator = (SOperatorInfo*) param; if (pOperator->status == OP_EXEC_DONE) { return NULL; @@ -5877,22 +6298,20 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { SAggOperatorInfo* pAggInfo = pOperator->info; SOptrBasicInfo* pInfo = &pAggInfo->binfo; - - STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->pRes); + toSDatablock(&pAggInfo->groupResInfo, pAggInfo->pResultBuf, pInfo->pRes, pAggInfo->binfo.capacity); - if (pInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { + if (pInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pAggInfo->groupResInfo)) { pOperator->status = OP_EXEC_DONE; } return pInfo->pRes; } - STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; - int32_t order = pQueryAttr->order.order; - + // table scan order + int32_t order = TSDB_ORDER_ASC; SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { @@ -5904,9 +6323,8 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { break; } - setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->pCtx, pOperator->numOfOutput); - -// if (downstream->operatorType == OP_DataBlocksOptScan) { +// setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->pCtx, pOperator->numOfOutput); +// if (downstream->operatorType == OP_TableScan) { // STableScanInfo* pScanInfo = downstream->info; // order = getTableScanOrder(pScanInfo); // } @@ -5915,7 +6333,7 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { setInputDataBlock(pOperator, pInfo->pCtx, pBlock, order); TSKEY key = 0; - if (QUERY_IS_ASC_QUERY(pQueryAttr)) { + if (order == TSDB_ORDER_ASC) { key = pBlock->info.window.ekey; TSKEY_MAX_ADD(key, 1); } else { @@ -5923,20 +6341,18 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { TSKEY_MIN_SUB(key, -1); } - setExecutionContext(pRuntimeEnv, pInfo, pOperator->numOfOutput, pRuntimeEnv->current->groupIndex, key); - doAggregateImpl(pOperator, pQueryAttr->window.skey, pInfo->pCtx, pBlock); + setExecutionContext(pOperator->numOfOutput, pAggInfo->current->groupIndex, key, pTaskInfo, pAggInfo->current, pAggInfo); + doAggregateImpl(pOperator, 0, pInfo->pCtx, pBlock); } pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pInfo->resultRowInfo); + updateNumOfRowsInResultRows(pInfo->pCtx, pOperator->numOfOutput, &pInfo->resultRowInfo, pInfo->rowCellInfoOffset); - updateNumOfRowsInResultRows(pRuntimeEnv, pInfo->pCtx, pOperator->numOfOutput, &pInfo->resultRowInfo, - pInfo->rowCellInfoOffset); + initGroupResInfo(&pAggInfo->groupResInfo, &pInfo->resultRowInfo); + toSDatablock(&pAggInfo->groupResInfo, pAggInfo->pResultBuf, pInfo->pRes, pAggInfo->binfo.capacity); - initGroupResInfo(&pRuntimeEnv->groupResInfo, &pInfo->resultRowInfo); - - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->pRes); - if (pInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { + if (pInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pAggInfo->groupResInfo)) { doSetOperatorCompleted(pOperator); } @@ -5964,7 +6380,7 @@ static SSDataBlock* doProjectOperation(void* param, bool* newgroup) { // todo dynamic set tags if (pTableQueryInfo != NULL) { - setTagValue(pOperator, pTableQueryInfo->pTable, pInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pTableQueryInfo->pTable, pInfo->pCtx, pOperator->numOfOutput); } // the pDataBlock are always the same one, no need to call this again @@ -6014,7 +6430,7 @@ static SSDataBlock* doProjectOperation(void* param, bool* newgroup) { // todo dynamic set tags if (pTableQueryInfo != NULL) { - setTagValue(pOperator, pTableQueryInfo->pTable, pInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pTableQueryInfo->pTable, pInfo->pCtx, pOperator->numOfOutput); } // the pDataBlock are always the same one, no need to call this again @@ -6125,7 +6541,7 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); +// toSDatablock(pAggInfo->pGroupResInfo, pAggInfo->pResultBuf, pInfo->pRes, pAggInfo->binfo.capacity); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { doSetOperatorCompleted(pOperator); } @@ -6148,7 +6564,7 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { break; } - setTagValue(pOperator, pRuntimeEnv->current->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pRuntimeEnv->current->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); // the pDataBlock are always the same one, no need to call this again setInputDataBlock(pOperator, pIntervalInfo->pCtx, pBlock, pQueryAttr->order.order); @@ -6165,7 +6581,7 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { finalizeQueryResult(pOperator, pIntervalInfo->pCtx, &pIntervalInfo->resultRowInfo, pIntervalInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pIntervalInfo->resultRowInfo); - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { doSetOperatorCompleted(pOperator); @@ -6184,7 +6600,7 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { doSetOperatorCompleted(pOperator); @@ -6208,7 +6624,7 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { break; } - setTagValue(pOperator, pRuntimeEnv->current->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pRuntimeEnv->current->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); // the pDataBlock are always the same one, no need to call this again setInputDataBlock(pOperator, pIntervalInfo->pCtx, pBlock, pQueryAttr->order.order); @@ -6225,7 +6641,7 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { finalizeQueryResult(pOperator, pIntervalInfo->pCtx, &pIntervalInfo->resultRowInfo, pIntervalInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pIntervalInfo->resultRowInfo); - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6246,7 +6662,7 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { if (pOperator->status == OP_RES_TO_RETURN) { int64_t st = taosGetTimestampUs(); - copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); +// copyToSDataBlock(NULL, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainData(&pRuntimeEnv->groupResInfo)) { doSetOperatorCompleted(pOperator); } @@ -6274,7 +6690,7 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { // the pDataBlock are always the same one, no need to call this again STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current; - setTagValue(pOperator, pTableQueryInfo->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pTableQueryInfo->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); setInputDataBlock(pOperator, pIntervalInfo->pCtx, pBlock, pQueryAttr->order.order); setIntervalQueryRange(pRuntimeEnv, pBlock->info.window.skey); @@ -6286,7 +6702,7 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { doCloseAllTimeWindow(pRuntimeEnv); setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); - copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); +// copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainData(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; } @@ -6304,7 +6720,7 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); +// copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainData(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; } @@ -6329,7 +6745,7 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { // the pDataBlock are always the same one, no need to call this again STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current; - setTagValue(pOperator, pTableQueryInfo->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pTableQueryInfo->pTable, pIntervalInfo->pCtx, pOperator->numOfOutput); setInputDataBlock(pOperator, pIntervalInfo->pCtx, pBlock, pQueryAttr->order.order); setIntervalQueryRange(pRuntimeEnv, pBlock->info.window.skey); @@ -6342,7 +6758,7 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); int64_t st = taosGetTimestampUs(); - copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); +// copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainData(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; } @@ -6439,7 +6855,7 @@ static SSDataBlock* doStateWindowAgg(void *param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); if (pBInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6477,7 +6893,7 @@ static SSDataBlock* doStateWindowAgg(void *param, bool* newgroup) { finalizeQueryResult(pOperator, pBInfo->pCtx, &pBInfo->resultRowInfo, pBInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pBInfo->resultRowInfo); - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); if (pBInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6498,7 +6914,7 @@ static SSDataBlock* doSessionWindowAgg(void* param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); if (pBInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6537,7 +6953,7 @@ static SSDataBlock* doSessionWindowAgg(void* param, bool* newgroup) { finalizeQueryResult(pOperator, pBInfo->pCtx, &pBInfo->resultRowInfo, pBInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pBInfo->resultRowInfo); - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); if (pBInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6556,7 +6972,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); if (pInfo->binfo.pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; @@ -6577,7 +6993,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { // the pDataBlock are always the same one, no need to call this again setInputDataBlock(pOperator, pInfo->binfo.pCtx, pBlock, pRuntimeEnv->pQueryAttr->order.order); - setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->binfo.pCtx, pOperator->numOfOutput); +// setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->binfo.pCtx, pOperator->numOfOutput); if (pInfo->colIndex == -1) { pInfo->colIndex = getGroupbyColumnIndex(pRuntimeEnv->pQueryAttr->pGroupbyExpr, pBlock); } @@ -6592,7 +7008,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { if (!pRuntimeEnv->pQueryAttr->stableQuery) { // finalize include the update of result rows finalizeQueryResult(pOperator, pInfo->binfo.pCtx, &pInfo->binfo.resultRowInfo, pInfo->binfo.rowCellInfoOffset); } else { - updateNumOfRowsInResultRows(pRuntimeEnv, pInfo->binfo.pCtx, pOperator->numOfOutput, &pInfo->binfo.resultRowInfo, pInfo->binfo.rowCellInfoOffset); + updateNumOfRowsInResultRows(pInfo->binfo.pCtx, pOperator->numOfOutput, &pInfo->binfo.resultRowInfo, pInfo->binfo.rowCellInfoOffset); } initGroupResInfo(&pRuntimeEnv->groupResInfo, &pInfo->binfo.resultRowInfo); @@ -6600,8 +7016,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { sortGroupResByOrderList(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); } - toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); - +// toSDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); if (pInfo->binfo.pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { pOperator->status = OP_EXEC_DONE; } @@ -6745,41 +7160,67 @@ static void destroyOperatorInfo(SOperatorInfo* pOperator) { tfree(pOperator); } -SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo) { - SAggOperatorInfo* pInfo = calloc(1, sizeof(SAggOperatorInfo)); - - int32_t numOfRows = 1;//(int32_t)(getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery)); - - size_t numOfOutput = taosArrayGetSize(pExprInfo); +static int32_t initAggInfo(SAggOperatorInfo* pInfo, SArray* pExprInfo, int32_t numOfRows, const STableGroupInfo* pTableGroupInfo) { pInfo->binfo.pRes = createOutputBuf_rv(pExprInfo, numOfRows); - pInfo->binfo.pCtx = createSqlFunctionCtx_rv(pExprInfo, &pInfo->binfo.rowCellInfoOffset); + pInfo->binfo.pCtx = createSqlFunctionCtx_rv(pExprInfo, &pInfo->binfo.rowCellInfoOffset, &pInfo->binfo.resRowSize); + pInfo->binfo.capacity = 4096; - pInfo->pResultRowHashTable = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); - pInfo->pResultRowListSet = taosHashInit(100, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK); - pInfo->keyBuf = malloc(1024 + sizeof(int64_t) + POINTER_BYTES); // TODO: - pInfo->pool = initResultRowPool(getResultRowSize(pExprInfo)); + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + + pInfo->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK); + pInfo->pResultRowListSet = taosHashInit(100, hashFn, false, HASH_NO_LOCK); + pInfo->pool = initResultRowPool(getResultRowSize(pExprInfo)); pInfo->pResultRowArrayList = taosArrayInit(10, sizeof(SResultRowCell)); - initResultRowInfo(&pInfo->binfo.resultRowInfo, 8, TSDB_DATA_TYPE_INT); + pInfo->pTableQueryInfo = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo)); - pInfo->seed = rand(); - setDefaultOutputBuf_rv(pInfo, pInfo->seed, MAIN_SCAN, pTaskInfo); + int32_t index = 0; + for(int32_t i = 0; i < taosArrayGetSize(pTableGroupInfo->pGroupList); ++i) { + SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i); + for(int32_t j = 0; j < taosArrayGetSize(pa); ++j) { + STableKeyInfo* pk = taosArrayGet(pa, j); + STableQueryInfo* pTQueryInfo = &pInfo->pTableQueryInfo[index++]; + pTQueryInfo->uid = pk->uid; + pTQueryInfo->lastKey = pk->lastKey; + pTQueryInfo->groupIndex = i; + } + } + + STimeWindow win = {0, INT64_MAX}; + createTableQueryInfo(pInfo->pTableQueryInfo, false, win); + + return TSDB_CODE_SUCCESS; +} + +static SExprInfo* exprArrayDup(SArray* pExprInfo) { + size_t numOfOutput = taosArrayGetSize(pExprInfo); SExprInfo* p = calloc(numOfOutput, sizeof(SExprInfo)); - for(int32_t i = 0; i < taosArrayGetSize(pExprInfo); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pExprInfo); ++i) { SExprInfo* pExpr = taosArrayGetP(pExprInfo, i); assignExprInfo(&p[i], pExpr); } + return p; +} + +SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo) { + SAggOperatorInfo* pInfo = calloc(1, sizeof(SAggOperatorInfo)); + + int32_t numOfRows = 1; + //(int32_t)(getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery)); + + initAggInfo(pInfo, pExprInfo, numOfRows, pTableGroupInfo); + setDefaultOutputBuf_rv(pInfo, MAIN_SCAN, pTaskInfo); + SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "TableAggregate"; pOperator->operatorType = OP_Aggregate; pOperator->blockingOptr = true; pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; - pOperator->pExpr = p; - pOperator->numOfOutput = numOfOutput; - pOperator->pRuntimeEnv = NULL; + pOperator->pExpr = exprArrayDup(pExprInfo); + pOperator->numOfOutput = taosArrayGetSize(pExprInfo); pOperator->pTaskInfo = pTaskInfo; pOperator->exec = doAggregate; @@ -6796,7 +7237,7 @@ static void doDestroyBasicInfo(SOptrBasicInfo* pInfo, int32_t numOfOutput) { tfree(pInfo->rowCellInfoOffset); cleanupResultRowInfo(&pInfo->resultRowInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } static void destroyBasicOperatorInfo(void* param, int32_t numOfOutput) { @@ -6820,7 +7261,7 @@ static void destroySWindowOperatorInfo(void* param, int32_t numOfOutput) { static void destroySFillOperatorInfo(void* param, int32_t numOfOutput) { SFillOperatorInfo* pInfo = (SFillOperatorInfo*) param; pInfo->pFillInfo = taosDestroyFillInfo(pInfo->pFillInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); tfree(pInfo->p); } @@ -6837,12 +7278,17 @@ static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) { static void destroyTagScanOperatorInfo(void* param, int32_t numOfOutput) { STagScanInfo* pInfo = (STagScanInfo*) param; - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } static void destroyOrderOperatorInfo(void* param, int32_t numOfOutput) { SOrderOperatorInfo* pInfo = (SOrderOperatorInfo*) param; - pInfo->pDataBlock = destroyOutputBuf(pInfo->pDataBlock); + pInfo->pDataBlock = blockDataDestroy(pInfo->pDataBlock); + + taosArrayDestroy(pInfo->cmpParam.orderInfo); + destroyResultBuf(pInfo->pSortInternalBuf); + + tMergeTreeDestroy(pInfo->pMergeTree); } static void destroyConditionOperatorInfo(void* param, int32_t numOfOutput) { @@ -6855,29 +7301,29 @@ static void destroyDistinctOperatorInfo(void* param, int32_t numOfOutput) { taosHashCleanup(pInfo->pSet); tfree(pInfo->buf); taosArrayDestroy(pInfo->pDistinctDataInfo); - pInfo->pRes = destroyOutputBuf(pInfo->pRes); + pInfo->pRes = blockDataDestroy(pInfo->pRes); } -SOperatorInfo* createMultiTableAggOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createMultiTableAggOperatorInfo(SOperatorInfo* downstream, SArray* pExprInfo, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo) { SAggOperatorInfo* pInfo = calloc(1, sizeof(SAggOperatorInfo)); - size_t tableGroup = GET_NUM_OF_TABLEGROUP(pRuntimeEnv); + int32_t numOfRows = 1; + size_t numOfOutput = taosArrayGetSize(pExprInfo); + initAggInfo(pInfo, pExprInfo, numOfRows, pTableGroupInfo); - pInfo->binfo.pRes = createOutputBuf(pExpr, numOfOutput, (int32_t) tableGroup); - pInfo->binfo.pCtx = createSqlFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->binfo.rowCellInfoOffset); + size_t tableGroup = taosArrayGetSize(pTableGroupInfo->pGroupList); initResultRowInfo(&pInfo->binfo.resultRowInfo, (int32_t)tableGroup, TSDB_DATA_TYPE_INT); SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "MultiTableAggregate"; -// pOperator->operatorType = OP_MultiTableAggregate; + pOperator->operatorType = OP_MultiTableAggregate; pOperator->blockingOptr = true; pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; - pOperator->pExpr = pExpr; + pOperator->pExpr = exprArrayDup(pExprInfo); pOperator->numOfOutput = numOfOutput; - pOperator->pRuntimeEnv = pRuntimeEnv; - pOperator->exec = doSTableAggregate; + pOperator->exec = doMultiTableAggregate; pOperator->cleanupFn = destroyAggOperatorInfo; appendDownstream(pOperator, downstream); @@ -7719,17 +8165,19 @@ static SExecTaskInfo* createExecTaskInfo(uint64_t queryId, uint64_t taskId) { } static tsdbReaderT doCreateDataReader(STableScanPhyNode* pTableScanNode, SReadHandle* pHandle, uint64_t queryId, uint64_t taskId); + static int32_t doCreateTableGroup(void* metaHandle, int32_t tableType, uint64_t tableUid, STableGroupInfo* pGroupInfo, uint64_t queryId, uint64_t taskId); -SOperatorInfo* doCreateOperatorTreeNode(SPhyNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, uint64_t queryId, uint64_t taskId) { +SOperatorInfo* doCreateOperatorTreeNode(SPhyNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, uint64_t queryId, uint64_t taskId, STableGroupInfo* pTableGroupInfo) { if (pPhyNode->pChildren == NULL || taosArrayGetSize(pPhyNode->pChildren) == 0) { - if (pPhyNode->info.type == OP_DataBlocksOptScan) { + if (pPhyNode->info.type == OP_TableScan) { SScanPhyNode* pScanPhyNode = (SScanPhyNode*)pPhyNode; - size_t numOfCols = taosArrayGetSize(pPhyNode->pTargets); + size_t numOfCols = taosArrayGetSize(pPhyNode->pTargets); tsdbReaderT pDataReader = doCreateDataReader((STableScanPhyNode*) pPhyNode, pHandle, (uint64_t) queryId, taskId); - return createDataBlocksOptScanInfo(pDataReader, pScanPhyNode->order, numOfCols, pScanPhyNode->count, pScanPhyNode->reverse, pTaskInfo); + int32_t code = doCreateTableGroup(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableGroupInfo, queryId, taskId); + return createTableScanOperatorInfo(pDataReader, pScanPhyNode->order, numOfCols, pScanPhyNode->count, pScanPhyNode->reverse, pTaskInfo); } else if (pPhyNode->info.type == OP_Exchange) { SExchangePhyNode* pEx = (SExchangePhyNode*) pPhyNode; return createExchangeOperatorInfo(pEx->pSrcEndPoints, pEx->node.pTargets, pTaskInfo); @@ -7762,10 +8210,20 @@ SOperatorInfo* doCreateOperatorTreeNode(SPhyNode* pPhyNode, SExecTaskInfo* pTask size_t size = taosArrayGetSize(pPhyNode->pChildren); assert(size == 1); + // TODO single table agg for (int32_t i = 0; i < size; ++i) { SPhyNode* pChildNode = taosArrayGetP(pPhyNode->pChildren, i); - SOperatorInfo* op = doCreateOperatorTreeNode(pChildNode, pTaskInfo, pHandle, queryId, taskId); - return createAggregateOperatorInfo(op, pPhyNode->pTargets, pTaskInfo); + SOperatorInfo* op = doCreateOperatorTreeNode(pChildNode, pTaskInfo, pHandle, queryId, taskId, pTableGroupInfo); + return createAggregateOperatorInfo(op, pPhyNode->pTargets, pTaskInfo, pTableGroupInfo); + } + } else if (pPhyNode->info.type == OP_MultiTableAggregate) { + size_t size = taosArrayGetSize(pPhyNode->pChildren); + assert(size == 1); + + for (int32_t i = 0; i < size; ++i) { + SPhyNode* pChildNode = taosArrayGetP(pPhyNode->pChildren, i); + SOperatorInfo* op = doCreateOperatorTreeNode(pChildNode, pTaskInfo, pHandle, queryId, taskId, pTableGroupInfo); + return createMultiTableAggOperatorInfo(op, pPhyNode->pTargets, pTaskInfo, pTableGroupInfo); } } } @@ -7840,7 +8298,8 @@ int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SRead goto _complete; } - (*pTaskInfo)->pRoot = doCreateOperatorTreeNode(pPlan->pNode, *pTaskInfo, pHandle, queryId, taskId); + STableGroupInfo group = {0}; + (*pTaskInfo)->pRoot = doCreateOperatorTreeNode(pPlan->pNode, *pTaskInfo, pHandle, queryId, taskId, &group); if ((*pTaskInfo)->pRoot == NULL) { code = TSDB_CODE_QRY_OUT_OF_MEMORY; goto _complete; @@ -7855,367 +8314,6 @@ _complete: return code; } -/** - * pQueryMsg->head has been converted before this function is called. - * - * @param pQueryMsg - * @param pTableIdList - * @param pExpr - * @return - */ -//int32_t convertQueryMsg(SQueryTableReq *pQueryMsg, STaskParam* param) { -// int32_t code = TSDB_CODE_SUCCESS; -// -//// if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) { -//// return TSDB_CODE_QRY_INVALID_MSG; -//// } -// -// pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables); -// pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey); -// pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey); -// pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval); -// pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding); -// pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset); -// pQueryMsg->limit = htobe64(pQueryMsg->limit); -// pQueryMsg->offset = htobe64(pQueryMsg->offset); -// pQueryMsg->vgroupLimit = htobe64(pQueryMsg->vgroupLimit); -// -// pQueryMsg->order = htons(pQueryMsg->order); -// pQueryMsg->orderColId = htons(pQueryMsg->orderColId); -// pQueryMsg->queryType = htonl(pQueryMsg->queryType); -//// pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType); -// -// pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols); -// pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput); -// pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols); -// -// pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen); -// pQueryMsg->colCondLen = htons(pQueryMsg->colCondLen); -// -// pQueryMsg->tsBuf.tsOffset = htonl(pQueryMsg->tsBuf.tsOffset); -// pQueryMsg->tsBuf.tsLen = htonl(pQueryMsg->tsBuf.tsLen); -// pQueryMsg->tsBuf.tsNumOfBlocks = htonl(pQueryMsg->tsBuf.tsNumOfBlocks); -// pQueryMsg->tsBuf.tsOrder = htonl(pQueryMsg->tsBuf.tsOrder); -// -// pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags); -//// pQueryMsg->tbnameCondLen = htonl(pQueryMsg->tbnameCondLen); -// pQueryMsg->secondStageOutput = htonl(pQueryMsg->secondStageOutput); -// pQueryMsg->sqlstrLen = htonl(pQueryMsg->sqlstrLen); -// pQueryMsg->prevResultLen = htonl(pQueryMsg->prevResultLen); -//// pQueryMsg->sw.gap = htobe64(pQueryMsg->sw.gap); -//// pQueryMsg->sw.primaryColId = htonl(pQueryMsg->sw.primaryColId); -// pQueryMsg->tableScanOperator = htonl(pQueryMsg->tableScanOperator); -// pQueryMsg->numOfOperator = htonl(pQueryMsg->numOfOperator); -// pQueryMsg->udfContentOffset = htonl(pQueryMsg->udfContentOffset); -// pQueryMsg->udfContentLen = htonl(pQueryMsg->udfContentLen); -// pQueryMsg->udfNum = htonl(pQueryMsg->udfNum); -// -// // query msg safety check -// if (!validateQueryMsg(pQueryMsg)) { -// code = TSDB_CODE_QRY_INVALID_MSG; -// goto _cleanup; -// } -// -// char *pMsg = (char *)(pQueryMsg->tableCols) + sizeof(SColumnInfo) * pQueryMsg->numOfCols; -// for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) { -// SColumnInfo *pColInfo = &pQueryMsg->tableCols[col]; -// -// pColInfo->colId = htons(pColInfo->colId); -// pColInfo->type = htons(pColInfo->type); -// pColInfo->bytes = htons(pColInfo->bytes); -// pColInfo->flist.numOfFilters = 0; -// -// if (!isValidDataType(pColInfo->type)) { -// //qDebug("qmsg:%p, invalid data type in source column, index:%d, type:%d", pQueryMsg, col, pColInfo->type); -// code = TSDB_CODE_QRY_INVALID_MSG; -// goto _cleanup; -// } -// -///* -// int32_t numOfFilters = pColInfo->flist.numOfFilters; -// if (numOfFilters > 0) { -// pColInfo->flist.filterInfo = calloc(numOfFilters, sizeof(SColumnFilterInfo)); -// if (pColInfo->flist.filterInfo == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// } -// -// code = deserializeColFilterInfo(pColInfo->flist.filterInfo, numOfFilters, &pMsg); -// if (code != TSDB_CODE_SUCCESS) { -// goto _cleanup; -// } -//*/ -// } -// -// if (pQueryMsg->colCondLen > 0) { -// param->colCond = calloc(1, pQueryMsg->colCondLen); -// if (param->colCond == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// memcpy(param->colCond, pMsg, pQueryMsg->colCondLen); -// pMsg += pQueryMsg->colCondLen; -// } -// -// -// param->tableScanOperator = pQueryMsg->tableScanOperator; -// param->pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES); -// if (param->pExpr == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// SSqlExpr *pExprMsg = (SSqlExpr *)pMsg; -// -// for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) { -// param->pExpr[i] = pExprMsg; -// -//// pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex); -//// pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId); -//// pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag); -//// pExprMsg->colBytes = htons(pExprMsg->colBytes); -//// pExprMsg->colType = htons(pExprMsg->colType); -// -//// pExprMsg->resType = htons(pExprMsg->resType); -//// pExprMsg->resBytes = htons(pExprMsg->resBytes); -// pExprMsg->interBytes = htonl(pExprMsg->interBytes); -// -//// pExprMsg->functionId = htons(pExprMsg->functionId); -// pExprMsg->numOfParams = htons(pExprMsg->numOfParams); -//// pExprMsg->resColId = htons(pExprMsg->resColId); -//// pExprMsg->flist.numOfFilters = htons(pExprMsg->flist.numOfFilters); -// pMsg += sizeof(SSqlExpr); -// -// for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) { -// pExprMsg->param[j].nType = htonl(pExprMsg->param[j].nType); -// pExprMsg->param[j].nLen = htonl(pExprMsg->param[j].nLen); -// -// if (pExprMsg->param[j].nType == TSDB_DATA_TYPE_BINARY) { -// pExprMsg->param[j].pz = pMsg; -// pMsg += pExprMsg->param[j].nLen; // one more for the string terminated char. -// } else { -// pExprMsg->param[j].i = htobe64(pExprMsg->param[j].i); -// } -// } -// -//// int16_t functionId = pExprMsg->functionId; -//// if (functionId == FUNCTION_TAG || functionId == FUNCTION_TAGPRJ || functionId == FUNCTION_TAG_DUMMY) { -//// if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) { // ignore the column index check for arithmetic expression. -//// code = TSDB_CODE_QRY_INVALID_MSG; -//// goto _cleanup; -//// } -//// } -// -//// if (pExprMsg->flist.numOfFilters > 0) { -//// pExprMsg->flist.filterInfo = calloc(pExprMsg->flist.numOfFilters, sizeof(SColumnFilterInfo)); -//// } -//// -//// deserializeColFilterInfo(pExprMsg->flist.filterInfo, pExprMsg->flist.numOfFilters, &pMsg); -// pExprMsg = (SSqlExpr *)pMsg; -// } -// -// if (pQueryMsg->secondStageOutput) { -// pExprMsg = (SSqlExpr *)pMsg; -// param->pSecExpr = calloc(pQueryMsg->secondStageOutput, POINTER_BYTES); -// -// for (int32_t i = 0; i < pQueryMsg->secondStageOutput; ++i) { -// param->pSecExpr[i] = pExprMsg; -// -//// pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex); -//// pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId); -//// pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag); -//// pExprMsg->resType = htons(pExprMsg->resType); -//// pExprMsg->resBytes = htons(pExprMsg->resBytes); -//// pExprMsg->colBytes = htons(pExprMsg->colBytes); -//// pExprMsg->colType = htons(pExprMsg->colType); -// -//// pExprMsg->functionId = htons(pExprMsg->functionId); -// pExprMsg->numOfParams = htons(pExprMsg->numOfParams); -// -// pMsg += sizeof(SSqlExpr); -// -// for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) { -// pExprMsg->param[j].nType = htonl(pExprMsg->param[j].nType); -// pExprMsg->param[j].nLen = htonl(pExprMsg->param[j].nLen); -// -// if (pExprMsg->param[j].nType == TSDB_DATA_TYPE_BINARY) { -// pExprMsg->param[j].pz = pMsg; -// pMsg += pExprMsg->param[j].nLen; // one more for the string terminated char. -// } else { -// pExprMsg->param[j].i = htobe64(pExprMsg->param[j].i); -// } -// } -// -//// int16_t functionId = pExprMsg->functionId; -//// if (functionId == FUNCTION_TAG || functionId == FUNCTION_TAGPRJ || functionId == FUNCTION_TAG_DUMMY) { -//// if (!TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) { // ignore the column index check for arithmetic expression. -//// code = TSDB_CODE_QRY_INVALID_MSG; -//// goto _cleanup; -//// } -//// } -// -// pExprMsg = (SSqlExpr *)pMsg; -// } -// } -// -// pMsg = createTableIdList(pQueryMsg, pMsg, &(param->pTableIdList)); -// -// if (pQueryMsg->numOfGroupCols > 0) { // group by tag columns -// param->pGroupColIndex = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex)); -// if (param->pGroupColIndex == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) { -// param->pGroupColIndex[i].colId = htons(*(int16_t *)pMsg); -// pMsg += sizeof(param->pGroupColIndex[i].colId); -// -// param->pGroupColIndex[i].colIndex = htons(*(int16_t *)pMsg); -// pMsg += sizeof(param->pGroupColIndex[i].colIndex); -// -// param->pGroupColIndex[i].flag = htons(*(int16_t *)pMsg); -// pMsg += sizeof(param->pGroupColIndex[i].flag); -// -// memcpy(param->pGroupColIndex[i].name, pMsg, tListLen(param->pGroupColIndex[i].name)); -// pMsg += tListLen(param->pGroupColIndex[i].name); -// } -// -// pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx); -// pQueryMsg->orderType = htons(pQueryMsg->orderType); -// } -// -// pQueryMsg->fillType = htons(pQueryMsg->fillType); -// if (pQueryMsg->fillType != TSDB_FILL_NONE) { -// pQueryMsg->fillVal = (uint64_t)(pMsg); -// -// int64_t *v = (int64_t *)pMsg; -// for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) { -// v[i] = htobe64(v[i]); -// } -// -// pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput; -// } -// -// if (pQueryMsg->numOfTags > 0) { -// param->pTagColumnInfo = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags); -// if (param->pTagColumnInfo == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) { -// SColumnInfo* pTagCol = (SColumnInfo*) pMsg; -// -// pTagCol->colId = htons(pTagCol->colId); -// pTagCol->bytes = htons(pTagCol->bytes); -// pTagCol->type = htons(pTagCol->type); -//// pTagCol->flist.numOfFilters = 0; -// -// param->pTagColumnInfo[i] = *pTagCol; -// pMsg += sizeof(SColumnInfo); -// } -// } -// -// // the tag query condition expression string is located at the end of query msg -// if (pQueryMsg->tagCondLen > 0) { -// param->tagCond = calloc(1, pQueryMsg->tagCondLen); -// if (param->tagCond == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// memcpy(param->tagCond, pMsg, pQueryMsg->tagCondLen); -// pMsg += pQueryMsg->tagCondLen; -// } -// -// if (pQueryMsg->prevResultLen > 0) { -// param->prevResult = calloc(1, pQueryMsg->prevResultLen); -// if (param->prevResult == NULL) { -// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -// goto _cleanup; -// } -// -// memcpy(param->prevResult, pMsg, pQueryMsg->prevResultLen); -// pMsg += pQueryMsg->prevResultLen; -// } -// -//// if (pQueryMsg->tbnameCondLen > 0) { -//// param->tbnameCond = calloc(1, pQueryMsg->tbnameCondLen + 1); -//// if (param->tbnameCond == NULL) { -//// code = TSDB_CODE_QRY_OUT_OF_MEMORY; -//// goto _cleanup; -//// } -//// -//// strncpy(param->tbnameCond, pMsg, pQueryMsg->tbnameCondLen); -//// pMsg += pQueryMsg->tbnameCondLen; -//// } -// -// //skip ts buf -// if ((pQueryMsg->tsBuf.tsOffset + pQueryMsg->tsBuf.tsLen) > 0) { -// pMsg = (char *)pQueryMsg + pQueryMsg->tsBuf.tsOffset + pQueryMsg->tsBuf.tsLen; -// } -// -// param->pOperator = taosArrayInit(pQueryMsg->numOfOperator, sizeof(int32_t)); -// for(int32_t i = 0; i < pQueryMsg->numOfOperator; ++i) { -// int32_t op = htonl(*(int32_t*)pMsg); -// taosArrayPush(param->pOperator, &op); -// -// pMsg += sizeof(int32_t); -// } -// -// if (pQueryMsg->udfContentLen > 0) { -// // todo extract udf function in tudf.c -//// param->pUdfInfo = calloc(1, sizeof(SUdfInfo)); -//// param->pUdfInfo->contLen = pQueryMsg->udfContentLen; -//// -//// pMsg = (char*)pQueryMsg + pQueryMsg->udfContentOffset; -//// param->pUdfInfo->resType = *(int8_t*) pMsg; -//// pMsg += sizeof(int8_t); -//// -//// param->pUdfInfo->resBytes = htons(*(int16_t*)pMsg); -//// pMsg += sizeof(int16_t); -//// -//// tstr* name = (tstr*)(pMsg); -//// param->pUdfInfo->name = strndup(name->data, name->len); -//// -//// pMsg += varDataTLen(name); -//// param->pUdfInfo->funcType = htonl(*(int32_t*)pMsg); -//// pMsg += sizeof(int32_t); -//// -//// param->pUdfInfo->bufSize = htonl(*(int32_t*)pMsg); -//// pMsg += sizeof(int32_t); -//// -//// param->pUdfInfo->content = malloc(pQueryMsg->udfContentLen); -//// memcpy(param->pUdfInfo->content, pMsg, pQueryMsg->udfContentLen); -// -// pMsg += pQueryMsg->udfContentLen; -// } -// -// param->sql = strndup(pMsg, pQueryMsg->sqlstrLen); -// -// SQueriedTableInfo info = { .numOfTags = pQueryMsg->numOfTags, .numOfCols = pQueryMsg->numOfCols, .colList = pQueryMsg->tableCols}; -// if (!validateQueryTableCols(&info, param->pExpr, pQueryMsg->numOfOutput, param->pTagColumnInfo, pQueryMsg)) { -// code = TSDB_CODE_QRY_INVALID_MSG; -// goto _cleanup; -// } -// -// //qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, " -//// "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64, -//// pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols, -//// pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->interval.interval, -//// pQueryMsg->fillType, pQueryMsg->tsBuf.tsLen, pQueryMsg->tsBuf.tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset); -// -// //qDebug("qmsg:%p, sql:%s", pQueryMsg, param->sql); -// return TSDB_CODE_SUCCESS; -// -//_cleanup: -// freeParam(param); -// return code; -//} - int32_t cloneExprFilterInfo(SColumnFilterInfo **dst, SColumnFilterInfo* src, int32_t filterNum) { if (filterNum <= 0) { return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/test/executorTests.cpp b/source/libs/executor/test/executorTests.cpp index 8381a7c585..ebea6755d7 100644 --- a/source/libs/executor/test/executorTests.cpp +++ b/source/libs/executor/test/executorTests.cpp @@ -33,193 +33,312 @@ #include "stub.h" #include "executor.h" -/** -{ - "Id": { - "QueryId": 1.3108161807422521e+19, - "TemplateId": 0, - "SubplanId": 0 - }, - "Node": { - "Name": "TableScan", - "Targets": [{ - "Base": { - "Schema": { - "Type": 9, - "ColId": 5000, - "Bytes": 8 - }, - "Columns": [{ - "TableId": 1, - "Flag": 0, - "Info": { - "ColId": 1, - "Type": 9, - "Bytes": 8 - } - }], - "InterBytes": 0 - }, - "Expr": { - "Type": 4, - "Column": { - "Type": 9, - "ColId": 1, - "Bytes": 8 - } - } - }, { - "Base": { - "Schema": { - "Type": 4, - "ColId": 5001, - "Bytes": 4 - }, - "Columns": [{ - "TableId": 1, - "Flag": 0, - "Info": { - "ColId": 2, - "Type": 4, - "Bytes": 4 - } - }], - "InterBytes": 0 - }, - "Expr": { - "Type": 4, - "Column": { - "Type": 4, - "ColId": 2, - "Bytes": 4 - } - } - }], - "InputSchema": [{ - "Type": 9, - "ColId": 5000, - "Bytes": 8 - }, { - "Type": 4, - "ColId": 5001, - "Bytes": 4 - }], - "TableScan": { - "TableId": 1, - "TableType": 2, - "Flag": 0, - "Window": { - "StartKey": -9.2233720368547758e+18, - "EndKey": 9.2233720368547758e+18 - } - } - }, - "DataSink": { - "Name": "Dispatch", - "Dispatch": { - } - } -} - */ +namespace { +typedef struct SDummyInputInfo { + int32_t max; + int32_t current; + int32_t startVal; + SSDataBlock* pBlock; +} SDummyInputInfo; + +SSDataBlock* getDummyBlock(void* param, bool* newgroup) { + SOperatorInfo* pOperator = static_cast(param); + SDummyInputInfo* pInfo = static_cast(pOperator->info); + if (pInfo->current >= pInfo->max) { + return NULL; + } + + int32_t numOfRows = 1000; + + if (pInfo->pBlock == NULL) { + pInfo->pBlock = static_cast(calloc(1, sizeof(SSDataBlock))); + + pInfo->pBlock->pDataBlock = taosArrayInit(4, sizeof(SColumnInfoData)); + + SColumnInfoData colInfo = {0}; + colInfo.info.type = TSDB_DATA_TYPE_INT; + colInfo.info.bytes = sizeof(int32_t); + colInfo.info.colId = 1; + colInfo.pData = static_cast(calloc(numOfRows, sizeof(int32_t))); + colInfo.nullbitmap = static_cast(calloc(1, (numOfRows + 7) / 8)); + + taosArrayPush(pInfo->pBlock->pDataBlock, &colInfo); + +// SColumnInfoData colInfo1 = {0}; +// colInfo1.info.type = TSDB_DATA_TYPE_BINARY; +// colInfo1.info.bytes = 40; +// colInfo1.info.colId = 2; +// +// colInfo1.varmeta.allocLen = 0;//numOfRows * sizeof(int32_t); +// colInfo1.varmeta.length = 0; +// colInfo1.varmeta.offset = static_cast(calloc(1, numOfRows * sizeof(int32_t))); +// +// taosArrayPush(pInfo->pBlock->pDataBlock, &colInfo1); + } else { + blockDataClearup(pInfo->pBlock, true); + } + + SSDataBlock* pBlock = pInfo->pBlock; + + char buf[128] = {0}; + char b1[128] = {0}; + for(int32_t i = 0; i < numOfRows; ++i) { + SColumnInfoData* pColInfo = static_cast(TARRAY_GET_ELEM(pBlock->pDataBlock, 0)); + + int32_t v = (--pInfo->startVal); + colDataAppend(pColInfo, i, reinterpret_cast(&v), false); + +// sprintf(buf, "this is %d row", i); +// STR_TO_VARSTR(b1, buf); +// +// SColumnInfoData* pColInfo2 = static_cast(TARRAY_GET_ELEM(pBlock->pDataBlock, 1)); +// colDataAppend(pColInfo2, i, b1, false); + } + + pBlock->info.rows = numOfRows; + pBlock->info.numOfCols = 1; + + pInfo->current += 1; + return pBlock; +} + +SOperatorInfo* createDummyOperator(int32_t numOfBlocks) { + SOperatorInfo* pOperator = static_cast(calloc(1, sizeof(SOperatorInfo))); + pOperator->name = "dummyInputOpertor4Test"; + pOperator->exec = getDummyBlock; + + SDummyInputInfo *pInfo = (SDummyInputInfo*) calloc(1, sizeof(SDummyInputInfo)); + pInfo->max = numOfBlocks; + pInfo->startVal = 1500000; + + pOperator->info = pInfo; + return pOperator; +} +} int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } TEST(testCase, build_executor_tree_Test) { - - const char* msg = "{\n" - "\t\"Id\":\t{\n" - "\t\t\"QueryId\":\t1.3108161807422521e+19,\n" - "\t\t\"TemplateId\":\t0,\n" - "\t\t\"SubplanId\":\t0\n" - "\t},\n" - "\t\"Node\":\t{\n" - "\t\t\"Name\":\t\"TableScan\",\n" - "\t\t\"Targets\":\t[{\n" - "\t\t\t\t\"Base\":\t{\n" - "\t\t\t\t\t\"Schema\":\t{\n" - "\t\t\t\t\t\t\"Type\":\t9,\n" - "\t\t\t\t\t\t\"ColId\":\t5000,\n" - "\t\t\t\t\t\t\"Bytes\":\t8\n" - "\t\t\t\t\t},\n" - "\t\t\t\t\t\"Columns\":\t[{\n" - "\t\t\t\t\t\t\t\"TableId\":\t1,\n" - "\t\t\t\t\t\t\t\"Flag\":\t0,\n" - "\t\t\t\t\t\t\t\"Info\":\t{\n" - "\t\t\t\t\t\t\t\t\"ColId\":\t1,\n" - "\t\t\t\t\t\t\t\t\"Type\":\t9,\n" - "\t\t\t\t\t\t\t\t\"Bytes\":\t8\n" - "\t\t\t\t\t\t\t}\n" - "\t\t\t\t\t\t}],\n" - "\t\t\t\t\t\"InterBytes\":\t0\n" - "\t\t\t\t},\n" - "\t\t\t\t\"Expr\":\t{\n" - "\t\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\t\"Column\":\t{\n" - "\t\t\t\t\t\t\"Type\":\t9,\n" - "\t\t\t\t\t\t\"ColId\":\t1,\n" - "\t\t\t\t\t\t\"Bytes\":\t8\n" - "\t\t\t\t\t}\n" - "\t\t\t\t}\n" - "\t\t\t}, {\n" - "\t\t\t\t\"Base\":\t{\n" - "\t\t\t\t\t\"Schema\":\t{\n" - "\t\t\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\t\t\"ColId\":\t5001,\n" - "\t\t\t\t\t\t\"Bytes\":\t4\n" - "\t\t\t\t\t},\n" - "\t\t\t\t\t\"Columns\":\t[{\n" - "\t\t\t\t\t\t\t\"TableId\":\t1,\n" - "\t\t\t\t\t\t\t\"Flag\":\t0,\n" - "\t\t\t\t\t\t\t\"Info\":\t{\n" - "\t\t\t\t\t\t\t\t\"ColId\":\t2,\n" - "\t\t\t\t\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\t\t\t\t\"Bytes\":\t4\n" - "\t\t\t\t\t\t\t}\n" - "\t\t\t\t\t\t}],\n" - "\t\t\t\t\t\"InterBytes\":\t0\n" - "\t\t\t\t},\n" - "\t\t\t\t\"Expr\":\t{\n" - "\t\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\t\"Column\":\t{\n" - "\t\t\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\t\t\"ColId\":\t2,\n" - "\t\t\t\t\t\t\"Bytes\":\t4\n" - "\t\t\t\t\t}\n" - "\t\t\t\t}\n" - "\t\t\t}],\n" - "\t\t\"InputSchema\":\t[{\n" - "\t\t\t\t\"Type\":\t9,\n" - "\t\t\t\t\"ColId\":\t5000,\n" - "\t\t\t\t\"Bytes\":\t8\n" - "\t\t\t}, {\n" - "\t\t\t\t\"Type\":\t4,\n" - "\t\t\t\t\"ColId\":\t5001,\n" - "\t\t\t\t\"Bytes\":\t4\n" - "\t\t\t}],\n" - "\t\t\"TableScan\":\t{\n" - "\t\t\t\"TableId\":\t1,\n" - "\t\t\t\"TableType\":\t2,\n" - "\t\t\t\"Flag\":\t0,\n" - "\t\t\t\"Window\":\t{\n" - "\t\t\t\t\"StartKey\":\t-9.2233720368547758e+18,\n" - "\t\t\t\t\"EndKey\":\t9.2233720368547758e+18\n" - "\t\t\t}\n" - "\t\t}\n" - "\t},\n" - "\t\"DataSink\":\t{\n" - "\t\t\"Name\":\t\"Dispatch\",\n" - "\t\t\"Dispatch\":\t{\n" - "\t\t}\n" - "\t}\n" - "}"; + "\t\"Id\":\t{\n" + "\t\t\"QueryId\":\t1.3108161807422521e+19,\n" + "\t\t\"TemplateId\":\t0,\n" + "\t\t\"SubplanId\":\t0\n" + "\t},\n" + "\t\"Node\":\t{\n" + "\t\t\"Name\":\t\"TableScan\",\n" + "\t\t\"Targets\":\t[{\n" + "\t\t\t\t\"Base\":\t{\n" + "\t\t\t\t\t\"Schema\":\t{\n" + "\t\t\t\t\t\t\"Type\":\t9,\n" + "\t\t\t\t\t\t\"ColId\":\t5000,\n" + "\t\t\t\t\t\t\"Bytes\":\t8\n" + "\t\t\t\t\t},\n" + "\t\t\t\t\t\"Columns\":\t[{\n" + "\t\t\t\t\t\t\t\"TableId\":\t1,\n" + "\t\t\t\t\t\t\t\"Flag\":\t0,\n" + "\t\t\t\t\t\t\t\"Info\":\t{\n" + "\t\t\t\t\t\t\t\t\"ColId\":\t1,\n" + "\t\t\t\t\t\t\t\t\"Type\":\t9,\n" + "\t\t\t\t\t\t\t\t\"Bytes\":\t8\n" + "\t\t\t\t\t\t\t}\n" + "\t\t\t\t\t\t}],\n" + "\t\t\t\t\t\"InterBytes\":\t0\n" + "\t\t\t\t},\n" + "\t\t\t\t\"Expr\":\t{\n" + "\t\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\t\"Column\":\t{\n" + "\t\t\t\t\t\t\"Type\":\t9,\n" + "\t\t\t\t\t\t\"ColId\":\t1,\n" + "\t\t\t\t\t\t\"Bytes\":\t8\n" + "\t\t\t\t\t}\n" + "\t\t\t\t}\n" + "\t\t\t}, {\n" + "\t\t\t\t\"Base\":\t{\n" + "\t\t\t\t\t\"Schema\":\t{\n" + "\t\t\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\t\t\"ColId\":\t5001,\n" + "\t\t\t\t\t\t\"Bytes\":\t4\n" + "\t\t\t\t\t},\n" + "\t\t\t\t\t\"Columns\":\t[{\n" + "\t\t\t\t\t\t\t\"TableId\":\t1,\n" + "\t\t\t\t\t\t\t\"Flag\":\t0,\n" + "\t\t\t\t\t\t\t\"Info\":\t{\n" + "\t\t\t\t\t\t\t\t\"ColId\":\t2,\n" + "\t\t\t\t\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\t\t\t\t\"Bytes\":\t4\n" + "\t\t\t\t\t\t\t}\n" + "\t\t\t\t\t\t}],\n" + "\t\t\t\t\t\"InterBytes\":\t0\n" + "\t\t\t\t},\n" + "\t\t\t\t\"Expr\":\t{\n" + "\t\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\t\"Column\":\t{\n" + "\t\t\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\t\t\"ColId\":\t2,\n" + "\t\t\t\t\t\t\"Bytes\":\t4\n" + "\t\t\t\t\t}\n" + "\t\t\t\t}\n" + "\t\t\t}],\n" + "\t\t\"InputSchema\":\t[{\n" + "\t\t\t\t\"Type\":\t9,\n" + "\t\t\t\t\"ColId\":\t5000,\n" + "\t\t\t\t\"Bytes\":\t8\n" + "\t\t\t}, {\n" + "\t\t\t\t\"Type\":\t4,\n" + "\t\t\t\t\"ColId\":\t5001,\n" + "\t\t\t\t\"Bytes\":\t4\n" + "\t\t\t}],\n" + "\t\t\"TableScan\":\t{\n" + "\t\t\t\"TableId\":\t1,\n" + "\t\t\t\"TableType\":\t2,\n" + "\t\t\t\"Flag\":\t0,\n" + "\t\t\t\"Window\":\t{\n" + "\t\t\t\t\"StartKey\":\t-9.2233720368547758e+18,\n" + "\t\t\t\t\"EndKey\":\t9.2233720368547758e+18\n" + "\t\t\t}\n" + "\t\t}\n" + "\t},\n" + "\t\"DataSink\":\t{\n" + "\t\t\"Name\":\t\"Dispatch\",\n" + "\t\t\"Dispatch\":\t{\n" + "\t\t}\n" + "\t}\n" + "}"; SExecTaskInfo* pTaskInfo = nullptr; DataSinkHandle sinkHandle = nullptr; - int32_t code = qCreateExecTask((SReadHandle*) 1, 2, 1, NULL, (void**) &pTaskInfo, &sinkHandle); + SReadHandle handle = {.reader = reinterpret_cast(0x1), .meta = reinterpret_cast(0x1)}; + +// int32_t code = qCreateExecTask(&handle, 2, 1, NULL, (void**) &pTaskInfo, &sinkHandle); } +//TEST(testCase, inMem_sort_Test) { +// SArray* pOrderVal = taosArrayInit(4, sizeof(SOrder)); +// SOrder o = {.order = TSDB_ORDER_ASC}; +// o.col.info.colId = 1; +// o.col.info.type = TSDB_DATA_TYPE_INT; +// taosArrayPush(pOrderVal, &o); +// +// SArray* pExprInfo = taosArrayInit(4, sizeof(SExprInfo)); +// SExprInfo *exp = static_cast(calloc(1, sizeof(SExprInfo))); +// exp->base.resSchema = createSchema(TSDB_DATA_TYPE_INT, sizeof(int32_t), 1, "res"); +// taosArrayPush(pExprInfo, &exp); +// +// SExprInfo *exp1 = static_cast(calloc(1, sizeof(SExprInfo))); +// exp1->base.resSchema = createSchema(TSDB_DATA_TYPE_BINARY, 40, 2, "res1"); +// taosArrayPush(pExprInfo, &exp1); +// +// SOperatorInfo* pOperator = createOrderOperatorInfo(createDummyOperator(5), pExprInfo, pOrderVal); +// +// bool newgroup = false; +// SSDataBlock* pRes = pOperator->exec(pOperator, &newgroup); +// +// SColumnInfoData* pCol1 = static_cast(taosArrayGet(pRes->pDataBlock, 0)); +// SColumnInfoData* pCol2 = static_cast(taosArrayGet(pRes->pDataBlock, 1)); +// for(int32_t i = 0; i < pRes->info.rows; ++i) { +// char* p = colDataGet(pCol2, i); +// printf("%d: %d, %s\n", i, ((int32_t*)pCol1->pData)[i], (char*)varDataVal(p)); +// } +//} + +typedef struct su { + int32_t v; + char *c; +} su; + +int32_t cmp(const void* p1, const void* p2) { + su* v1 = (su*) p1; + su* v2 = (su*) p2; + + int32_t x1 = *(int32_t*) v1->c; + int32_t x2 = *(int32_t*) v2->c; + if (x1 == x2) { + return 0; + } else { + return x1 < x2? -1:1; + } +} + +TEST(testCase, external_sort_Test) { +#if 0 + su* v = static_cast(calloc(1000000, sizeof(su))); + for(int32_t i = 0; i < 1000000; ++i) { + v[i].v = rand(); + v[i].c = static_cast(malloc(4)); + *(int32_t*) v[i].c = i; + } + + qsort(v, 1000000, sizeof(su), cmp); +// for(int32_t i = 0; i < 1000; ++i) { +// printf("%d ", v[i]); +// } +// printf("\n"); + return; +#endif + + srand(time(NULL)); + + SArray* pOrderVal = taosArrayInit(4, sizeof(SOrder)); + SOrder o = {0}; + o.order = TSDB_ORDER_ASC; + o.col.info.colId = 1; + o.col.info.type = TSDB_DATA_TYPE_INT; + taosArrayPush(pOrderVal, &o); + + SArray* pExprInfo = taosArrayInit(4, sizeof(SExprInfo)); + SExprInfo *exp = static_cast(calloc(1, sizeof(SExprInfo))); + exp->base.resSchema = createSchema(TSDB_DATA_TYPE_INT, sizeof(int32_t), 1, "res"); + taosArrayPush(pExprInfo, &exp); + + SExprInfo *exp1 = static_cast(calloc(1, sizeof(SExprInfo))); + exp1->base.resSchema = createSchema(TSDB_DATA_TYPE_BINARY, 40, 2, "res1"); +// taosArrayPush(pExprInfo, &exp1); + + SOperatorInfo* pOperator = createOrderOperatorInfo(createDummyOperator(1500), pExprInfo, pOrderVal); + + bool newgroup = false; + SSDataBlock* pRes = NULL; + + int32_t total = 1; + + int64_t s1 = taosGetTimestampUs(); + int32_t t = 1; + + while(1) { + int64_t s = taosGetTimestampUs(); + pRes = pOperator->exec(pOperator, &newgroup); + + int64_t e = taosGetTimestampUs(); + if (t++ == 1) { + printf("---------------elapsed:%ld\n", e - s); + } + + if (pRes == NULL) { + break; + } + + SColumnInfoData* pCol1 = static_cast(taosArrayGet(pRes->pDataBlock, 0)); +// SColumnInfoData* pCol2 = static_cast(taosArrayGet(pRes->pDataBlock, 1)); + for (int32_t i = 0; i < pRes->info.rows; ++i) { +// char* p = colDataGet(pCol2, i); + printf("%d: %d\n", total++, ((int32_t*)pCol1->pData)[i]); +// printf("%d: %d, %s\n", total++, ((int32_t*)pCol1->pData)[i], (char*)varDataVal(p)); + } + } + + printStatisBeforeClose(((SOrderOperatorInfo*) pOperator->info)->pSortInternalBuf); + + int64_t s2 = taosGetTimestampUs(); + printf("total:%ld\n", s2 - s1); + + pOperator->cleanupFn(pOperator->info, 2); + tfree(exp); + tfree(exp1); + taosArrayDestroy(pExprInfo); + taosArrayDestroy(pOrderVal); +} #pragma GCC diagnostic pop diff --git a/source/libs/function/inc/thistogram.h b/source/libs/function/inc/thistogram.h index 3b5c2b4cfb..cb6560325b 100644 --- a/source/libs/function/inc/thistogram.h +++ b/source/libs/function/inc/thistogram.h @@ -49,7 +49,7 @@ typedef struct SHistogramInfo { SHistBin* elems; #else tSkipList* pList; - SLoserTreeInfo* pLoserTree; + SMultiwayMergeTreeInfo* pLoserTree; int32_t maxIndex; bool ordered; #endif diff --git a/source/libs/function/inc/tpercentile.h b/source/libs/function/inc/tpercentile.h index 563a63f6a5..dfb52f7694 100644 --- a/source/libs/function/inc/tpercentile.h +++ b/source/libs/function/inc/tpercentile.h @@ -20,7 +20,7 @@ extern "C" { #endif -#include "tpagedfile.h" +#include "tpagedbuf.h" #include "ttszip.h" typedef struct MinMaxEntry { @@ -63,7 +63,7 @@ typedef struct tMemBucket { __compar_fn_t comparFn; tMemBucketSlot * pSlots; - SDiskbasedResultBuf *pBuffer; + SDiskbasedBuf *pBuffer; __perc_hash_func_t hashFunc; } tMemBucket; diff --git a/source/libs/function/src/thistogram.c b/source/libs/function/src/thistogram.c index 2229ac8561..49799aef7a 100644 --- a/source/libs/function/src/thistogram.c +++ b/source/libs/function/src/thistogram.c @@ -117,14 +117,14 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { if ((*pHisto)->ordered) { int32_t lastIndex = (*pHisto)->maxIndex; - SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + SMultiwayMergeTreeInfo* pTree = (*pHisto)->pLoserTree; (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].pData = pResNode; pEntry1->index = (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].index; // update the loser tree if ((*pHisto)->ordered) { - tLoserTreeAdjust(pTree, pEntry1->index + pTree->numOfEntries); + tMergeTreeAdjust(pTree, pEntry1->index + pTree->numOfEntries); } tSkipListKey kx = @@ -142,10 +142,10 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData; pPrevEntry->delta = val - pPrevEntry->val; - SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + SMultiwayMergeTreeInfo* pTree = (*pHisto)->pLoserTree; if ((*pHisto)->ordered) { - tLoserTreeAdjust(pTree, pPrevEntry->index + pTree->numOfEntries); - tLoserTreeDisplay(pTree); + tMergeTreeAdjust(pTree, pPrevEntry->index + pTree->numOfEntries); + tMergeTreePrint(pTree); } } @@ -155,7 +155,7 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { if (!(*pHisto)->ordered) { SSkipListPrint((*pHisto)->pList, 1); - SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + SMultiwayMergeTreeInfo* pTree = (*pHisto)->pLoserTree; tSkipListNode* pHead = (*pHisto)->pList->pHead.pForward[0]; tSkipListNode* p1 = pHead; @@ -183,13 +183,13 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { pTree->pNode[i].index = -1; } - tLoserTreeDisplay(pTree); + tMergeTreePrint(pTree); for (int32_t i = pTree->totalEntries - 1; i >= pTree->numOfEntries; i--) { - tLoserTreeAdjust(pTree, i); + tMergeTreeAdjust(pTree, i); } - tLoserTreeDisplay(pTree); + tMergeTreePrint(pTree); (*pHisto)->ordered = true; } @@ -219,7 +219,7 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { pPrevEntry->delta = pEntry->val - pPrevEntry->val; } - SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + SMultiwayMergeTreeInfo* pTree = (*pHisto)->pLoserTree; if (pNextEntry->index != -1) { (*pHisto)->maxIndex = pNextEntry->index; @@ -230,12 +230,12 @@ int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { printf("disappear index is:%d\n", f); } - tLoserTreeAdjust(pTree, pEntry->index + pTree->numOfEntries); + tMergeTreeAdjust(pTree, pEntry->index + pTree->numOfEntries); // remove the next node in skiplist tSkipListRemoveNode((*pHisto)->pList, pNext); SSkipListPrint((*pHisto)->pList, 1); - tLoserTreeDisplay((*pHisto)->pLoserTree); + tMergeTreePrint((*pHisto)->pLoserTree); } else { // add to heap if (pResNode->pForward[0] != NULL) { pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val; diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index 5d8876fee1..40731adc58 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -15,10 +15,10 @@ #include #include "os.h" -#include "tpercentile.h" -#include "tpagedfile.h" #include "taosdef.h" #include "tcompare.h" +#include "tpagedbuf.h" +#include "tpercentile.h" #include "ttypes.h" #define DEFAULT_NUM_OF_SLOT 1024 @@ -35,9 +35,9 @@ static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) int32_t offset = 0; for(int32_t i = 0; i < list->size; ++i) { - SPageInfo* pgInfo = *(SPageInfo**) taosArrayGet(list, i); + struct SPageInfo* pgInfo = *(struct SPageInfo**) taosArrayGet(list, i); - SFilePage* pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + SFilePage* pg = getBufPage(pMemBucket->pBuffer, getPageId(pgInfo)); memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); offset += (int32_t)(pg->num * pMemBucket->bytes); @@ -98,8 +98,8 @@ double findOnlyResult(tMemBucket *pMemBucket) { SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId); assert(list->size == 1); - SPageInfo* pgInfo = (SPageInfo*) taosArrayGetP(list, 0); - SFilePage* pPage = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + struct SPageInfo* pgInfo = (struct SPageInfo*) taosArrayGetP(list, 0); + SFilePage* pPage = getBufPage(pMemBucket->pBuffer, getPageId(pgInfo)); assert(pPage->num == 1); double v = 0; @@ -254,7 +254,7 @@ tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, resetSlotInfo(pBucket); - int32_t ret = createDiskbasedResultBuffer(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, 1, tsTempDir); + int32_t ret = createDiskbasedBuffer(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, 1, tsTempDir); if (ret != 0) { tMemBucketDestroy(pBucket); return NULL; @@ -343,7 +343,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) { assert(pSlot->info.data->num >= pBucket->elemPerPage && pSlot->info.size > 0); // keep the pointer in memory - releaseResBufPage(pBucket->pBuffer, pSlot->info.data); + releaseBufPage(pBucket->pBuffer, pSlot->info.data); pSlot->info.data = NULL; } @@ -471,10 +471,10 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) for (int32_t f = 0; f < list->size; ++f) { SPageInfo *pgInfo = *(SPageInfo **)taosArrayGet(list, f); - SFilePage *pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + SFilePage *pg = getBufPage(pMemBucket->pBuffer, getPageId(pgInfo)); tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num); - releaseResBufPageInfo(pMemBucket->pBuffer, pgInfo); + releaseBufPageInfo(pMemBucket->pBuffer, pgInfo); } return getPercentileImpl(pMemBucket, count - num, fraction); diff --git a/source/libs/planner/src/physicalPlan.c b/source/libs/planner/src/physicalPlan.c index 20e8378c26..eadc95b98d 100644 --- a/source/libs/planner/src/physicalPlan.c +++ b/source/libs/planner/src/physicalPlan.c @@ -216,7 +216,7 @@ static SPhyNode* createMultiTableScanNode(SQueryPlanNode* pPlanNode, SQueryTable } else if (needSeqScan(pPlanNode)) { return createUserTableScanNode(pPlanNode, pTable, OP_TableSeqScan); } - int32_t type = (pPlanNode->info.type == QNODE_TABLESCAN)? OP_DataBlocksOptScan:OP_StreamScan; + int32_t type = (pPlanNode->info.type == QNODE_TABLESCAN)? OP_TableScan:OP_StreamScan; return createUserTableScanNode(pPlanNode, pTable, type); } @@ -288,7 +288,7 @@ static bool needMultiNodeScan(SQueryTableInfo* pTable) { static SPhyNode* createSingleTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTableInfo, SSubplan* subplan) { SVgroupsInfo* pVgroupsInfo = pTableInfo->pMeta->vgroupList; vgroupInfoToNodeAddr(&(pVgroupsInfo->vgroups[0]), &subplan->execNode); - int32_t type = (pPlanNode->info.type == QNODE_TABLESCAN)? OP_DataBlocksOptScan:OP_StreamScan; + int32_t type = (pPlanNode->info.type == QNODE_TABLESCAN)? OP_TableScan:OP_StreamScan; return createUserTableScanNode(pPlanNode, pTableInfo, type); } diff --git a/source/libs/planner/src/physicalPlanJson.c b/source/libs/planner/src/physicalPlanJson.c index e367f2e74b..b2109c0a4f 100644 --- a/source/libs/planner/src/physicalPlanJson.c +++ b/source/libs/planner/src/physicalPlanJson.c @@ -88,7 +88,7 @@ static const char* jkPnodeType = "Type"; static int32_t getPnodeTypeSize(cJSON* json) { switch (getNumber(json, jkPnodeType)) { case OP_StreamScan: - case OP_DataBlocksOptScan: + case OP_TableScan: case OP_TableSeqScan: return sizeof(STableScanPhyNode); case OP_TagScan: @@ -830,7 +830,7 @@ static bool specificPhyNodeToJson(const void* obj, cJSON* json) { const SPhyNode* phyNode = (const SPhyNode*)obj; switch (phyNode->info.type) { case OP_StreamScan: - case OP_DataBlocksOptScan: + case OP_TableScan: case OP_TableSeqScan: return tableScanNodeToJson(obj, json); case OP_TagScan: @@ -868,7 +868,7 @@ static bool specificPhyNodeFromJson(const cJSON* json, void* obj) { SPhyNode* phyNode = (SPhyNode*)obj; switch (phyNode->info.type) { case OP_StreamScan: - case OP_DataBlocksOptScan: + case OP_TableScan: case OP_TableSeqScan: return tableScanNodeFromJson(json, obj); case OP_TagScan: diff --git a/source/util/src/tlosertree.c b/source/util/src/tlosertree.c index 6155ba4c1a..80bbac2c78 100644 --- a/source/util/src/tlosertree.c +++ b/source/util/src/tlosertree.c @@ -14,82 +14,85 @@ */ #include "os.h" -#include "tlosertree.h" #include "ulog.h" +#include "tlosertree.h" +#include "taoserror.h" -// set initial value for loser tree -void tLoserTreeInit(SLoserTreeInfo* pTree) { - assert((pTree->totalEntries & 0x01) == 0 && (pTree->numOfEntries << 1 == pTree->totalEntries)); - for (int32_t i = 0; i < pTree->totalEntries; ++i) { - if (i < pTree->numOfEntries) { + +// Set the initial value of the multiway merge tree. +static void tMergeTreeInit(SMultiwayMergeTreeInfo* pTree) { + assert((pTree->totalSources & 0x01) == 0 && (pTree->numOfSources << 1 == pTree->totalSources)); + + for (int32_t i = 0; i < pTree->totalSources; ++i) { + if (i < pTree->numOfSources) { pTree->pNode[i].index = -1; } else { - pTree->pNode[i].index = i - pTree->numOfEntries; + pTree->pNode[i].index = i - pTree->numOfSources; } } } -/* - * display whole loser tree on screen for debug purpose only. - */ -void tLoserTreeDisplay(SLoserTreeInfo* pTree) { - printf("the value of loser tree:\t"); - for (int32_t i = 0; i < pTree->totalEntries; ++i) printf("%d\t", pTree->pNode[i].index); - printf("\n"); -} +int32_t tMergeTreeCreate(SMultiwayMergeTreeInfo** pTree, uint32_t numOfSources, void* param, __merge_compare_fn_t compareFn) { + int32_t totalEntries = numOfSources << 1u; -uint32_t tLoserTreeCreate(SLoserTreeInfo** pTree, int32_t numOfEntries, void* param, __merge_compare_fn_t compareFn) { - int32_t totalEntries = numOfEntries << 1; - - *pTree = (SLoserTreeInfo*)calloc(1, sizeof(SLoserTreeInfo) + sizeof(SLoserTreeNode) * totalEntries); - if ((*pTree) == NULL) { + SMultiwayMergeTreeInfo* pTreeInfo = (SMultiwayMergeTreeInfo*)calloc(1, sizeof(SMultiwayMergeTreeInfo) + sizeof(STreeNode) * totalEntries); + if (pTreeInfo == NULL) { uError("allocate memory for loser-tree failed. reason:%s", strerror(errno)); - return -1; + return TAOS_SYSTEM_ERROR(errno); } - (*pTree)->pNode = (SLoserTreeNode*)(((char*)(*pTree)) + sizeof(SLoserTreeInfo)); + pTreeInfo->pNode = (STreeNode*)(((char*)pTreeInfo) + sizeof(SMultiwayMergeTreeInfo)); - (*pTree)->numOfEntries = numOfEntries; - (*pTree)->totalEntries = totalEntries; - (*pTree)->param = param; - (*pTree)->comparFn = compareFn; + pTreeInfo->numOfSources = numOfSources; + pTreeInfo->totalSources = totalEntries; + pTreeInfo->param = param; + pTreeInfo->comparFn = compareFn; // set initial value for loser tree - tLoserTreeInit(*pTree); + tMergeTreeInit(pTreeInfo); #ifdef _DEBUG_VIEW printf("the initial value of loser tree:\n"); - tLoserTreeDisplay(*pTree); + tLoserTreeDisplaypTreeInfo; #endif - for (int32_t i = totalEntries - 1; i >= numOfEntries; i--) { - tLoserTreeAdjust(*pTree, i); + for (int32_t i = totalEntries - 1; i >= numOfSources; i--) { + tMergeTreeAdjust(pTreeInfo, i); } #if defined(_DEBUG_VIEW) printf("after adjust:\n"); - tLoserTreeDisplay(*pTree); + tLoserTreeDisplaypTreeInfo; printf("initialize local reducer completed!\n"); #endif + *pTree = pTreeInfo; return 0; } -void tLoserTreeAdjust(SLoserTreeInfo* pTree, int32_t idx) { - assert(idx <= pTree->totalEntries - 1 && idx >= pTree->numOfEntries && pTree->totalEntries >= 2); +void tMergeTreeDestroy(SMultiwayMergeTreeInfo* pTree) { + if (pTree == NULL) { + return; + } - if (pTree->totalEntries == 2) { + tfree(pTree); +} + +void tMergeTreeAdjust(SMultiwayMergeTreeInfo* pTree, int32_t idx) { + assert(idx <= pTree->totalSources - 1 && idx >= pTree->numOfSources && pTree->totalSources >= 2); + + if (pTree->totalSources == 2) { pTree->pNode[0].index = 0; pTree->pNode[1].index = 0; return; } int32_t parentId = idx >> 1; - SLoserTreeNode kLeaf = pTree->pNode[idx]; + STreeNode kLeaf = pTree->pNode[idx]; while (parentId > 0) { - SLoserTreeNode* pCur = &pTree->pNode[parentId]; + STreeNode* pCur = &pTree->pNode[parentId]; if (pCur->index == -1) { pTree->pNode[parentId] = kLeaf; return; @@ -97,7 +100,7 @@ void tLoserTreeAdjust(SLoserTreeInfo* pTree, int32_t idx) { int32_t ret = pTree->comparFn(pCur, &kLeaf, pTree->param); if (ret < 0) { - SLoserTreeNode t = pTree->pNode[parentId]; + STreeNode t = pTree->pNode[parentId]; pTree->pNode[parentId] = kLeaf; kLeaf = t; } @@ -111,11 +114,23 @@ void tLoserTreeAdjust(SLoserTreeInfo* pTree, int32_t idx) { } } -void tLoserTreeRebuild(SLoserTreeInfo* pTree) { - assert((pTree->totalEntries & 0x1) == 0); +void tMergeTreeRebuild(SMultiwayMergeTreeInfo* pTree) { + assert((pTree->totalSources & 0x1) == 0); - tLoserTreeInit(pTree); - for (int32_t i = pTree->totalEntries - 1; i >= pTree->numOfEntries; i--) { - tLoserTreeAdjust(pTree, i); + tMergeTreeInit(pTree); + for (int32_t i = pTree->totalSources - 1; i >= pTree->numOfSources; i--) { + tMergeTreeAdjust(pTree, i); } } + +/* + * display whole loser tree on screen for debug purpose only. + */ +void tMergeTreePrint(const SMultiwayMergeTreeInfo* pTree) { + printf("the value of loser tree:\t"); + for (int32_t i = 0; i < pTree->totalSources; ++i) { + printf("%d\t", pTree->pNode[i].index); + } + + printf("\n"); +} diff --git a/source/util/src/tpagedbuf.c b/source/util/src/tpagedbuf.c new file mode 100644 index 0000000000..0e8d85492c --- /dev/null +++ b/source/util/src/tpagedbuf.c @@ -0,0 +1,597 @@ +#include "os.h" +#include "ulog.h" +#include "tpagedbuf.h" +#include "taoserror.h" +#include "tcompression.h" +#include "thash.h" + +#define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES) +#define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) + +typedef struct SFreeListItem { + int32_t offset; + int32_t len; +} SFreeListItem; + +typedef struct SPageDiskInfo { + int64_t offset; + int32_t length; +} SPageDiskInfo; + +typedef struct SPageInfo { + SListNode* pn; // point to list node + void* pData; + int64_t offset; + int32_t pageId; + int32_t length:30; + bool used:1; // set current page is in used + bool dirty:1; // set current buffer page is dirty or not +} SPageInfo; + +typedef struct SDiskbasedBuf { + int32_t numOfPages; + int64_t totalBufSize; + uint64_t fileSize; // disk file size + FILE* file; + int32_t allocateId; // allocated page id + char* path; // file path + int32_t pageSize; // current used page size + int32_t inMemPages; // numOfPages that are allocated in memory + SHashObj* groupSet; // id hash table + SHashObj* all; + SList* lruList; + void* emptyDummyIdList; // dummy id list + void* assistBuf; // assistant buffer for compress/decompress data + SArray* pFree; // free area in file + bool comp; // compressed before flushed to disk + uint64_t nextPos; // next page flush position + + uint64_t qId; // for debug purpose + bool printStatis; // Print statistics info when closing this buffer. + SDiskbasedBufStatis statis; +} SDiskbasedBuf; + +static void printStatisData(const SDiskbasedBuf* pBuf); + + int32_t createDiskbasedBuffer(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir) { + *pBuf = calloc(1, sizeof(SDiskbasedBuf)); + + SDiskbasedBuf* pResBuf = *pBuf; + if (pResBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pResBuf->pageSize = pagesize; + pResBuf->numOfPages = 0; // all pages are in buffer in the first place + pResBuf->totalBufSize = 0; + pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit. + pResBuf->allocateId = -1; + pResBuf->comp = true; + pResBuf->file = NULL; + pResBuf->qId = qId; + pResBuf->fileSize = 0; + + // at least more than 2 pages must be in memory + assert(inMemBufSize >= pagesize * 2); + + pResBuf->lruList = tdListNew(POINTER_BYTES); + + // init id hash table + pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); + pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES + pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); + + char path[PATH_MAX] = {0}; + taosGetTmpfilePath(dir, "qbuf", path); + pResBuf->path = strdup(path); + + pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); + +// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize, +// pResBuf->inMemPages, pResBuf->path); + + return TSDB_CODE_SUCCESS; +} + +static int32_t createDiskFile(SDiskbasedBuf* pBuf) { + pBuf->file = fopen(pBuf->path, "wb+"); + if (pBuf->file == NULL) { +// qError("failed to create tmp file: %s on disk. %s", pBuf->path, strerror(errno)); + return TAOS_SYSTEM_ERROR(errno); + } + + return TSDB_CODE_SUCCESS; +} + +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedBuf* pBuf) { // do nothing + if (!pBuf->comp) { + *dst = srcSize; + return data; + } + + *dst = tsCompressString(data, srcSize, 1, pBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, pBuf->assistBuf, *dst); + return data; +} + +static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedBuf* pBuf) { // do nothing + if (!pBuf->comp) { + *dst = srcSize; + return data; + } + + *dst = tsDecompressString(data, srcSize, 1, pBuf->assistBuf, pBuf->pageSize+sizeof(SFilePage), ONE_STAGE_COMP, NULL, 0); + if (*dst > 0) { + memcpy(data, pBuf->assistBuf, *dst); + } + return data; +} + +static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { + if (pBuf->pFree == NULL) { + return pBuf->nextPos; + } else { + int32_t offset = -1; + + size_t num = taosArrayGetSize(pBuf->pFree); + for(int32_t i = 0; i < num; ++i) { + SFreeListItem* pi = taosArrayGet(pBuf->pFree, i); + if (pi->len >= size) { + offset = pi->offset; + pi->offset += (int32_t)size; + pi->len -= (int32_t)size; + + return offset; + } + } + + // no available recycle space, allocate new area in file + return pBuf->nextPos; + } +} + +static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { + assert(!pg->used && pg->pData != NULL); + + int32_t size = -1; + char* t = NULL; + if (pg->offset == -1 || pg->dirty) { + SFilePage* pPage = (SFilePage*) GET_DATA_PAYLOAD(pg); + t = doCompressData(pPage->data, pBuf->pageSize, &size, pBuf); + } + + // this page is flushed to disk for the first time + if (pg->offset == -1) { + assert(pg->dirty == true); + + pg->offset = allocatePositionInFile(pBuf, size); + pBuf->nextPos += size; + + int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET); + if (ret != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return NULL; + } + + ret = (int32_t) fwrite(t, 1, size, pBuf->file); + if (ret != size) { + terrno = TAOS_SYSTEM_ERROR(errno); + return NULL; + } + + if (pBuf->fileSize < pg->offset + size) { + pBuf->fileSize = pg->offset + size; + } + + pBuf->statis.flushBytes += size; + pBuf->statis.flushPages += 1; + } else if (pg->dirty) { + // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing + if (pg->length < size) { + // 1. add current space to free list + SPageDiskInfo dinfo = {.length = pg->length, .offset = pg->offset}; + taosArrayPush(pBuf->pFree, &dinfo); + + // 2. allocate new position, and update the info + pg->offset = allocatePositionInFile(pBuf, size); + pBuf->nextPos += size; + } + + // 3. write to disk. + int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET); + if (ret != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return NULL; + } + + ret = (int32_t)fwrite(t, 1, size, pBuf->file); + if (ret != size) { + terrno = TAOS_SYSTEM_ERROR(errno); + return NULL; + } + + if (pBuf->fileSize < pg->offset + size) { + pBuf->fileSize = pg->offset + size; + } + + pBuf->statis.flushBytes += size; + pBuf->statis.flushPages += 1; + } + + char* pDataBuf = pg->pData; + memset(pDataBuf, 0, pBuf->pageSize + sizeof(SFilePage)); + + pg->pData = NULL; // this means the data is not in buffer + pg->length = size; + pg->dirty = false; + + return pDataBuf; +} + +static char* flushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { + int32_t ret = TSDB_CODE_SUCCESS; + assert(((int64_t) pBuf->numOfPages * pBuf->pageSize) == pBuf->totalBufSize && pBuf->numOfPages >= pBuf->inMemPages); + + if (pBuf->file == NULL) { + if ((ret = createDiskFile(pBuf)) != TSDB_CODE_SUCCESS) { + terrno = ret; + return NULL; + } + } + + return doFlushPageToDisk(pBuf, pg); +} + +// load file block data in disk +static int32_t loadPageFromDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { + int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET); + if (ret != 0) { + ret = TAOS_SYSTEM_ERROR(errno); + return ret; + } + + SFilePage* pPage = (SFilePage*) GET_DATA_PAYLOAD(pg); + ret = (int32_t)fread(pPage->data, 1, pg->length, pBuf->file); + if (ret != pg->length) { + ret = TAOS_SYSTEM_ERROR(errno); + return ret; + } + + pBuf->statis.loadBytes += pg->length; + pBuf->statis.loadPages += 1; + + int32_t fullSize = 0; + doDecompressData(pPage->data, pg->length, &fullSize, pBuf); + return 0; +} + +static SIDList addNewGroup(SDiskbasedBuf* pBuf, int32_t groupId) { + assert(taosHashGet(pBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); + + SArray* pa = taosArrayInit(1, POINTER_BYTES); + int32_t ret = taosHashPut(pBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); + assert(ret == 0); + + return pa; +} + +static SPageInfo* registerPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t pageId) { + SIDList list = NULL; + + char** p = taosHashGet(pBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id + list = addNewGroup(pBuf, groupId); + } else { + list = (SIDList) (*p); + } + + pBuf->numOfPages += 1; + + SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL}; + + ppi->pageId = pageId; + ppi->pData = NULL; + ppi->offset = -1; + ppi->length = -1; + ppi->used = true; + ppi->pn = NULL; + + return *(SPageInfo**) taosArrayPush(list, &ppi); +} + +static SListNode* getEldestUnrefedPage(SDiskbasedBuf* pBuf) { + SListIter iter = {0}; + tdListInitIter(pBuf->lruList, &iter, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter)) != NULL) { + assert(pn != NULL); + + SPageInfo* pageInfo = *(SPageInfo**) pn->data; + assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); + + if (!pageInfo->used) { + break; + } + } + + return pn; +} + +static char* evacOneDataPage(SDiskbasedBuf* pBuf) { + char* bufPage = NULL; + SListNode* pn = getEldestUnrefedPage(pBuf); + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + assert(0); + int32_t prev = pBuf->inMemPages; + + // increase by 50% of previous mem pages + pBuf->inMemPages = (int32_t)(pBuf->inMemPages * 1.5f); + +// qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pBuf, prev, +// pBuf->inMemPages, pBuf->pageSize); + } else { + tdListPopNode(pBuf->lruList, pn); + + SPageInfo* d = *(SPageInfo**) pn->data; + assert(d->pn == pn); + + d->pn = NULL; + tfree(pn); + + bufPage = flushPageToDisk(pBuf, d); + } + + return bufPage; +} + +static void lruListPushFront(SList *pList, SPageInfo* pi) { + tdListPrepend(pList, &pi); + SListNode* front = tdListGetHead(pList); + pi->pn = front; +} + +static void lruListMoveToFront(SList *pList, SPageInfo* pi) { + tdListPopNode(pList, pi->pn); + tdListPrependNode(pList, pi->pn); +} + +static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { + return pageSize + POINTER_BYTES + 2 + sizeof(SFilePage); +} + +SFilePage* getNewDataBuf(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) { + pBuf->statis.getPages += 1; + + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { + availablePage = evacOneDataPage(pBuf); + + // Failed to allocate a new buffer page, and there is an error occurs. + if (availablePage == NULL) { + return NULL; + } + } + + // register new id in this group + *pageId = (++pBuf->allocateId); + + // register page id info + SPageInfo* pi = registerPage(pBuf, groupId, *pageId); + + // add to LRU list + assert(listNEles(pBuf->lruList) < pBuf->inMemPages && pBuf->inMemPages > 0); + lruListPushFront(pBuf->lruList, pi); + + // add to hash map + taosHashPut(pBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); + + // allocate buf + if (availablePage == NULL) { + pi->pData = calloc(1, getAllocPageSize(pBuf->pageSize)); // add extract bytes in case of zipped buffer increased. + } else { + pi->pData = availablePage; + } + + pBuf->totalBufSize += pBuf->pageSize; + + ((void**)pi->pData)[0] = pi; + pi->used = true; + + return (void *)(GET_DATA_PAYLOAD(pi)); +} + +SFilePage* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { + assert(pBuf != NULL && id >= 0); + pBuf->statis.getPages += 1; + + SPageInfo** pi = taosHashGet(pBuf->all, &id, sizeof(int32_t)); + assert(pi != NULL && *pi != NULL); + + if ((*pi)->pData != NULL) { // it is in memory + // no need to update the LRU list if only one page exists + if (pBuf->numOfPages == 1) { + (*pi)->used = true; + return (void *)(GET_DATA_PAYLOAD(*pi)); + } + + SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data); + assert(*pInfo == *pi); + + lruListMoveToFront(pBuf->lruList, (*pi)); + (*pi)->used = true; + + return (void *)(GET_DATA_PAYLOAD(*pi)); + + } else { // not in memory + assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->length >= 0 && (*pi)->offset >= 0); + + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { + availablePage = evacOneDataPage(pBuf); + if (availablePage == NULL) { + return NULL; + } + } + + if (availablePage == NULL) { + (*pi)->pData = calloc(1, getAllocPageSize(pBuf->pageSize)); + } else { + (*pi)->pData = availablePage; + } + + ((void**)((*pi)->pData))[0] = (*pi); + + lruListPushFront(pBuf->lruList, *pi); + (*pi)->used = true; + + int32_t code = loadPageFromDisk(pBuf, *pi); + if (code != 0) { + return NULL; + } + + return (void *)(GET_DATA_PAYLOAD(*pi)); + } +} + +void releaseBufPage(SDiskbasedBuf* pBuf, void* page) { + assert(pBuf != NULL && page != NULL); + int32_t offset = offsetof(SPageInfo, pData); + char* p = page - offset; + + SPageInfo* ppi = ((SPageInfo**) p)[0]; + releaseBufPageInfo(pBuf, ppi); +} + +void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) { + assert(pi->pData != NULL && pi->used); + + pi->used = false; + pBuf->statis.releasePages += 1; +} + +size_t getNumOfResultBufGroupId(const SDiskbasedBuf* pBuf) { return taosHashGetSize(pBuf->groupSet); } + +size_t getTotalBufSize(const SDiskbasedBuf* pBuf) { return (size_t)pBuf->totalBufSize; } + +SIDList getDataBufPagesIdList(SDiskbasedBuf* pBuf, int32_t groupId) { + assert(pBuf != NULL); + + char** p = taosHashGet(pBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id + return pBuf->emptyDummyIdList; + } else { + return (SArray*) (*p); + } +} + +void destroyResultBuf(SDiskbasedBuf* pBuf) { + if (pBuf == NULL) { + return; + } + + printStatisData(pBuf); + + if (pBuf->file != NULL) { + uDebug("Paged buffer closed, total:%.2f Kb (%d Pages), inmem size:%.2f Kb (%d Pages), file size:%.2f Kb, page size:%.2f Kb, %"PRIx64"\n", + pBuf->totalBufSize/1024.0, pBuf->numOfPages, listNEles(pBuf->lruList) * pBuf->pageSize / 1024.0, + listNEles(pBuf->lruList), pBuf->fileSize/1024.0, pBuf->pageSize/1024.0f, pBuf->qId); + + fclose(pBuf->file); + } else { + uDebug("Paged buffer closed, total:%.2f Kb, no file created, %"PRIx64, pBuf->totalBufSize/1024.0, pBuf->qId); + } + + // print the statistics information + { + SDiskbasedBufStatis *ps = &pBuf->statis; + uDebug("Get/Release pages:%d/%d, flushToDisk:%.2f Kb (%d Pages), loadFromDisk:%.2f Kb (%d Pages), avgPageSize:%.2f Kb\n" + , ps->getPages, ps->releasePages, ps->flushBytes/1024.0f, ps->flushPages, ps->loadBytes/1024.0f, ps->loadPages + , ps->loadBytes/(1024.0 * ps->loadPages)); + } + + remove(pBuf->path); + tfree(pBuf->path); + + SArray** p = taosHashIterate(pBuf->groupSet, NULL); + while(p) { + size_t n = taosArrayGetSize(*p); + for(int32_t i = 0; i < n; ++i) { + SPageInfo* pi = taosArrayGetP(*p, i); + tfree(pi->pData); + tfree(pi); + } + + taosArrayDestroy(*p); + p = taosHashIterate(pBuf->groupSet, p); + } + + tdListFree(pBuf->lruList); + taosArrayDestroy(pBuf->emptyDummyIdList); + taosHashCleanup(pBuf->groupSet); + taosHashCleanup(pBuf->all); + + tfree(pBuf->assistBuf); + tfree(pBuf); +} + +SPageInfo* getLastPageInfo(SIDList pList) { + size_t size = taosArrayGetSize(pList); + SPageInfo* pPgInfo = taosArrayGetP(pList, size - 1); + return pPgInfo; +} + +int32_t getPageId(const SPageInfo* pPgInfo) { + ASSERT(pPgInfo != NULL); + return pPgInfo->pageId; +} + +int32_t getBufPageSize(const SDiskbasedBuf* pBuf) { + return pBuf->pageSize; +} + +int32_t getNumOfInMemBufPages(const SDiskbasedBuf* pBuf) { + return pBuf->inMemPages; +} + +bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf) { + return pBuf->fileSize == 0; +} + +void setBufPageDirty(SFilePage* pPage, bool dirty) { + int32_t offset = offsetof(SPageInfo, pData); // todo extract method + char* p = (char*)pPage - offset; + + SPageInfo* ppi = ((SPageInfo**) p)[0]; + ppi->dirty = dirty; +} + +void printStatisBeforeClose(SDiskbasedBuf* pBuf) { + pBuf->printStatis = true; +} + +SDiskbasedBufStatis getDBufStatis(const SDiskbasedBuf* pBuf) { + return pBuf->statis; +} + +void printStatisData(const SDiskbasedBuf* pBuf) { + if (!pBuf->printStatis) { + return; + } + + const SDiskbasedBufStatis* ps = &pBuf->statis; + + printf( + "Paged buffer closed, total:%.2f Kb (%d Pages), inmem size:%.2f Kb (%d Pages), file size:%.2f Kb, page size:%.2f " + "Kb, %" PRIx64 "\n", + pBuf->totalBufSize / 1024.0, pBuf->numOfPages, listNEles(pBuf->lruList) * pBuf->pageSize / 1024.0, + listNEles(pBuf->lruList), pBuf->fileSize / 1024.0, pBuf->pageSize / 1024.0f, pBuf->qId); + + printf( + "Get/Release pages:%d/%d, flushToDisk:%.2f Kb (%d Pages), loadFromDisk:%.2f Kb (%d Pages), avgPageSize:%.2f Kb\n", + ps->getPages, ps->releasePages, ps->flushBytes / 1024.0f, ps->flushPages, ps->loadBytes / 1024.0f, ps->loadPages, + ps->loadBytes / (1024.0 * ps->loadPages)); +} diff --git a/source/util/src/tpagedfile.c b/source/util/src/tpagedfile.c deleted file mode 100644 index 3cdba580d4..0000000000 --- a/source/util/src/tpagedfile.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE - -#include "tpagedfile.h" -#include "thash.h" -#include "stddef.h" -#include "taoserror.h" -#include "tcompression.h" - -#define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES) -#define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) - -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir) { - *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); - - SDiskbasedResultBuf* pResBuf = *pResultBuf; - if (pResBuf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - pResBuf->pageSize = pagesize; - pResBuf->numOfPages = 0; // all pages are in buffer in the first place - pResBuf->totalBufSize = 0; - pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit. - pResBuf->allocateId = -1; - pResBuf->comp = true; - pResBuf->file = NULL; - pResBuf->qId = qId; - pResBuf->fileSize = 0; - - // at least more than 2 pages must be in memory - assert(inMemBufSize >= pagesize * 2); - - pResBuf->lruList = tdListNew(POINTER_BYTES); - - // init id hash table - pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); - pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES - pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); - - char path[PATH_MAX] = {0}; - taosGetTmpfilePath(dir, "qbuf", path); - pResBuf->path = strdup(path); - - pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); - -// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize, -// pResBuf->inMemPages, pResBuf->path); - - return TSDB_CODE_SUCCESS; -} - -static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) { - pResultBuf->file = fopen(pResultBuf->path, "wb+"); - if (pResultBuf->file == NULL) { -// qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); - return TAOS_SYSTEM_ERROR(errno); - } - - return TSDB_CODE_SUCCESS; -} - -static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing - if (!pResultBuf->comp) { - *dst = srcSize; - return data; - } - - *dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); - - memcpy(data, pResultBuf->assistBuf, *dst); - return data; -} - -static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing - if (!pResultBuf->comp) { - *dst = srcSize; - return data; - } - - *dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0); - if (*dst > 0) { - memcpy(data, pResultBuf->assistBuf, *dst); - } - return data; -} - -static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) { - if (pResultBuf->pFree == NULL) { - return pResultBuf->nextPos; - } else { - int32_t offset = -1; - - size_t num = taosArrayGetSize(pResultBuf->pFree); - for(int32_t i = 0; i < num; ++i) { - SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i); - if (pi->len >= size) { - offset = pi->offset; - pi->offset += (int32_t)size; - pi->len -= (int32_t)size; - - return offset; - } - } - - // no available recycle space, allocate new area in file - return pResultBuf->nextPos; - } -} - -static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { - assert(!pg->used && pg->pData != NULL); - - int32_t size = -1; - char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf); - - // this page is flushed to disk for the first time - if (pg->info.offset == -1) { - pg->info.offset = allocatePositionInFile(pResultBuf, size); - pResultBuf->nextPos += size; - - int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); - assert(ret == 0); - - ret = (int32_t) fwrite(t, 1, size, pResultBuf->file); - assert(ret == size); - - if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { - pResultBuf->fileSize = pg->info.offset + pg->info.length; - } - } else { - // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing - if (pg->info.length < size) { - // 1. add current space to free list - taosArrayPush(pResultBuf->pFree, &pg->info); - - // 2. allocate new position, and update the info - pg->info.offset = allocatePositionInFile(pResultBuf, size); - pResultBuf->nextPos += size; - } - - //3. write to disk. - int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); - if (ret != 0) { // todo handle the error case - - } - - ret = (int32_t)fwrite(t, size, 1, pResultBuf->file); - if (ret != size) { // todo handle the error case - - } - - if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { - pResultBuf->fileSize = pg->info.offset + pg->info.length; - } - } - - char* ret = pg->pData; - memset(ret, 0, pResultBuf->pageSize); - - pg->pData = NULL; - pg->info.length = size; - - pResultBuf->statis.flushBytes += pg->info.length; - - return ret; -} - -static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { - int32_t ret = TSDB_CODE_SUCCESS; - assert(((int64_t) pResultBuf->numOfPages * pResultBuf->pageSize) == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); - - if (pResultBuf->file == NULL) { - if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) { - terrno = ret; - return NULL; - } - } - - return doFlushPageToDisk(pResultBuf, pg); -} - -// load file block data in disk -static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { - int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); - ret = (int32_t)fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file); - if (ret != pg->info.length) { - terrno = errno; - return NULL; - } - - pResultBuf->statis.loadBytes += pg->info.length; - - int32_t fullSize = 0; - doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf); - - return (char*)GET_DATA_PAYLOAD(pg); -} - -static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); - - SArray* pa = taosArrayInit(1, POINTER_BYTES); - int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); - assert(ret == 0); - - return pa; -} - -static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { - SIDList list = NULL; - - char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); - if (p == NULL) { // it is a new group id - list = addNewGroup(pResultBuf, groupId); - } else { - list = (SIDList) (*p); - } - - pResultBuf->numOfPages += 1; - - SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL}; - - ppi->pageId = pageId; - ppi->pData = NULL; - ppi->info = PAGE_INFO_INITIALIZER; - ppi->used = true; - ppi->pn = NULL; - - return *(SPageInfo**) taosArrayPush(list, &ppi); -} - -static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) { - SListIter iter = {0}; - tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); - - SListNode* pn = NULL; - while((pn = tdListNext(&iter)) != NULL) { - assert(pn != NULL); - - SPageInfo* pageInfo = *(SPageInfo**) pn->data; - assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); - - if (!pageInfo->used) { - break; - } - } - - return pn; -} - -static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) { - char* bufPage = NULL; - SListNode* pn = getEldestUnrefedPage(pResultBuf); - - // all pages are referenced by user, try to allocate new space - if (pn == NULL) { - int32_t prev = pResultBuf->inMemPages; - - // increase by 50% of previous mem pages - pResultBuf->inMemPages = (int32_t)(pResultBuf->inMemPages * 1.5f); - -// qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, -// pResultBuf->inMemPages, pResultBuf->pageSize); - } else { - pResultBuf->statis.flushPages += 1; - tdListPopNode(pResultBuf->lruList, pn); - - SPageInfo* d = *(SPageInfo**) pn->data; - assert(d->pn == pn); - - d->pn = NULL; - tfree(pn); - - bufPage = flushPageToDisk(pResultBuf, d); - } - - return bufPage; -} - -static void lruListPushFront(SList *pList, SPageInfo* pi) { - tdListPrepend(pList, &pi); - SListNode* front = tdListGetHead(pList); - pi->pn = front; -} - -static void lruListMoveToFront(SList *pList, SPageInfo* pi) { - tdListPopNode(pList, pi->pn); - tdListPrependNode(pList, pi->pn); -} - -static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { - return pageSize + POINTER_BYTES + 2 + sizeof(SFilePage); -} - -SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { - pResultBuf->statis.getPages += 1; - - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) { - availablePage = evicOneDataPage(pResultBuf); - } - - // register new id in this group - *pageId = (++pResultBuf->allocateId); - - // register page id info - SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); - - // add to LRU list - assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0); - - lruListPushFront(pResultBuf->lruList, pi); - - // add to hash map - taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); - - // allocate buf - if (availablePage == NULL) { - pi->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize)); // add extract bytes in case of zipped buffer increased. - } else { - pi->pData = availablePage; - } - - pResultBuf->totalBufSize += pResultBuf->pageSize; - - ((void**)pi->pData)[0] = pi; - pi->used = true; - - return (void *)(GET_DATA_PAYLOAD(pi)); -} - -SFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { - assert(pResultBuf != NULL && id >= 0); - pResultBuf->statis.getPages += 1; - - SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t)); - assert(pi != NULL && *pi != NULL); - - if ((*pi)->pData != NULL) { // it is in memory - // no need to update the LRU list if only one page exists - if (pResultBuf->numOfPages == 1) { - (*pi)->used = true; - return (void *)(GET_DATA_PAYLOAD(*pi)); - } - - SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data); - assert(*pInfo == *pi); - - lruListMoveToFront(pResultBuf->lruList, (*pi)); - (*pi)->used = true; - - return (void *)(GET_DATA_PAYLOAD(*pi)); - - } else { // not in memory - assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); - - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) { - availablePage = evicOneDataPage(pResultBuf); - } - - if (availablePage == NULL) { - (*pi)->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize)); - } else { - (*pi)->pData = availablePage; - } - - ((void**)((*pi)->pData))[0] = (*pi); - - lruListPushFront(pResultBuf->lruList, *pi); - (*pi)->used = true; - - loadPageFromDisk(pResultBuf, *pi); - return (void *)(GET_DATA_PAYLOAD(*pi)); - } -} - -void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { - assert(pResultBuf != NULL && page != NULL); - char* p = (char*) page - POINTER_BYTES; - - SPageInfo* ppi = ((SPageInfo**) p)[0]; - releaseResBufPageInfo(pResultBuf, ppi); -} - -void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) { - assert(pi->pData != NULL && pi->used); - - pi->used = false; - pResultBuf->statis.releasePages += 1; -} - -size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); } - -size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return (size_t)pResultBuf->totalBufSize; } - -SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - assert(pResultBuf != NULL); - - char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); - if (p == NULL) { // it is a new group id - return pResultBuf->emptyDummyIdList; - } else { - return (SArray*) (*p); - } -} - -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { - if (pResultBuf == NULL) { - return; - } - - if (pResultBuf->file != NULL) { -// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, inmem size:%.2f Kb, file size:%.2f Kb", -// pResultBuf->qId, pResultBuf->totalBufSize/1024.0, listNEles(pResultBuf->lruList) * pResultBuf->pageSize / 1024.0, -// pResultBuf->fileSize/1024.0); - - fclose(pResultBuf->file); - } else { -// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, no file created", pResultBuf->qId, -// pResultBuf->totalBufSize/1024.0); - } - - remove(pResultBuf->path); - tfree(pResultBuf->path); - - SArray** p = taosHashIterate(pResultBuf->groupSet, NULL); - while(p) { - size_t n = taosArrayGetSize(*p); - for(int32_t i = 0; i < n; ++i) { - SPageInfo* pi = taosArrayGetP(*p, i); - tfree(pi->pData); - tfree(pi); - } - - taosArrayDestroy(*p); - p = taosHashIterate(pResultBuf->groupSet, p); - } - - tdListFree(pResultBuf->lruList); - taosArrayDestroy(pResultBuf->emptyDummyIdList); - taosHashCleanup(pResultBuf->groupSet); - taosHashCleanup(pResultBuf->all); - - tfree(pResultBuf->assistBuf); - tfree(pResultBuf); -} - -SPageInfo* getLastPageInfo(SIDList pList) { - size_t size = taosArrayGetSize(pList); - return (SPageInfo*) taosArrayGetP(pList, size - 1); -} - diff --git a/source/util/test/pageBufferTest.cpp b/source/util/test/pageBufferTest.cpp new file mode 100644 index 0000000000..8fa8216223 --- /dev/null +++ b/source/util/test/pageBufferTest.cpp @@ -0,0 +1,165 @@ +#include +#include +#include + +#include "taos.h" +#include "tpagedbuf.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" + +namespace { +// simple test +void simpleTest() { + SDiskbasedBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedBuffer(&pResultBuf, 1024, 4096, 1, "/tmp/"); + + int32_t pageId = 0; + int32_t groupId = 0; + + SFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + + ASSERT_EQ(getTotalBufSize(pResultBuf), 1024); + + SIDList list = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(list), 1); + ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1); + + releaseBufPage(pResultBuf, pBufPage); + + SFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + + SFilePage* t = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t == pBufPage1); + + SFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t1 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage2); + + SFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t2 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage3); + + SFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t3 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage4); + + SFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t4 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage5); + + destroyResultBuf(pResultBuf); +} + +void writeDownTest() { + SDiskbasedBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedBuffer(&pResultBuf, 1024, 4*1024, 1, "/tmp/"); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + SFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + + *(int32_t*)(pBufPage->data) = nx; + writePageId = pageId; + releaseBufPage(pResultBuf, pBufPage); + + SFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t1 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + SFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t2 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + SFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t3 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + SFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t4 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseBufPage(pResultBuf, t4); + + // flush the written page to disk, and read it out again + SFilePage* pBufPagex = getBufPage(pResultBuf, writePageId); + ASSERT_EQ(*(int32_t*)pBufPagex->data, nx); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 5); + + destroyResultBuf(pResultBuf); +} + +void recyclePageTest() { + SDiskbasedBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedBuffer(&pResultBuf, 1024, 4*1024, 1, "/tmp/"); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + SFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + releaseBufPage(pResultBuf, pBufPage); + + SFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t1 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + SFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t2 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + SFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t3 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + SFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t4 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseBufPage(pResultBuf, t4); + + SFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + SFilePage* t5 = getBufPage(pResultBuf, pageId); + ASSERT_TRUE(t5 == pBufPage5); + ASSERT_TRUE(pageId == 5); + + // flush the written page to disk, and read it out again + SFilePage* pBufPagex = getBufPage(pResultBuf, writePageId); + *(int32_t*)(pBufPagex->data) = nx; + writePageId = pageId; // update the data + releaseBufPage(pResultBuf, pBufPagex); + + SFilePage* pBufPagex1 = getBufPage(pResultBuf, 1); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 6); + + destroyResultBuf(pResultBuf); +} +} // namespace + + +TEST(testCase, resultBufferTest) { + srand(time(NULL)); + simpleTest(); + writeDownTest(); + recyclePageTest(); +} + +#pragma GCC diagnostic pop \ No newline at end of file