diff --git a/cmake/cmake.define b/cmake/cmake.define index 3343798686..56b6b7e1de 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -149,6 +149,8 @@ ELSE () CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA) CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX) CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) + CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) + CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -168,7 +170,13 @@ ELSE () SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") ENDIF() - MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED") + MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2/AVX512) is ACTIVATED") + + IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") + MESSAGE(STATUS "avx512 supported by gcc") + ENDIF() ENDIF() # build mode diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index 2ea144c56a..4f8ccc418b 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored. - If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`. - To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client. -- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. +- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. Only `DDD` is specified without `DD` is not supported currently, e.g. format 'yyyy-mm-ddd' is not supported, but 'yyyy-mm-dd' is supported. - If `AM` or `PM` is specified in formats, the Hour must between `1-12`. - In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically. - The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index c1dc6a6363..26313390a6 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`. - 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`. - 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。 -- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. +- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 暂不支持只指定年日而不指定月日的格式, 如'yyyy-mm-DDD', 支持'yyyy-mm-DD'. - 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12. - `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换. - 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. diff --git a/include/common/tcommon.h b/include/common/tcommon.h index e072eaa831..30720da8a7 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -365,6 +365,11 @@ typedef struct SSortExecInfo { int32_t readBytes; // read io bytes } SSortExecInfo; +typedef struct SNonSortExecInfo { + int32_t blkNums; +} SNonSortExecInfo; + + typedef struct STUidTagInfo { char* name; uint64_t uid; diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4ef4273631..86d34502c6 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3774,6 +3774,7 @@ typedef struct { int64_t suid; SArray* deleteReqs; // SArray int64_t ctimeMs; // fill by vnode + int8_t level; // 0 tsdb(default), 1 rsma1 , 2 rsma2 } SBatchDeleteReq; int32_t tEncodeSBatchDeleteReq(SEncoder* pCoder, const SBatchDeleteReq* pReq); diff --git a/include/libs/nodes/nodes.h b/include/libs/nodes/nodes.h index 9725aa48c0..7fbdbfb211 100644 --- a/include/libs/nodes/nodes.h +++ b/include/libs/nodes/nodes.h @@ -121,6 +121,7 @@ int32_t nodesListMakeAppend(SNodeList** pList, SNode* pNode); int32_t nodesListMakeStrictAppend(SNodeList** pList, SNode* pNode); int32_t nodesListAppendList(SNodeList* pTarget, SNodeList* pSrc); int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc); +int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc); int32_t nodesListPushFront(SNodeList* pList, SNode* pNode); SListCell* nodesListErase(SNodeList* pList, SListCell* pCell); void nodesListInsertList(SNodeList* pTarget, SListCell* pPos, SNodeList* pSrc); diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 4ffcb616dd..e29750d8a0 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -40,6 +40,13 @@ typedef enum EGroupAction { GROUP_ACTION_CLEAR } EGroupAction; +typedef enum EMergeType { + MERGE_TYPE_SORT = 1, + MERGE_TYPE_NON_SORT, + MERGE_TYPE_COLUMNS, + MERGE_TYPE_MAX_VALUE +} EMergeType; + typedef struct SLogicNode { ENodeType type; bool dynamicOp; @@ -138,6 +145,7 @@ typedef struct SAggLogicNode { bool hasGroupKeyOptimized; bool isGroupTb; bool isPartTb; // true if partition keys has tbname + bool hasGroup; } SAggLogicNode; typedef struct SProjectLogicNode { @@ -221,6 +229,8 @@ typedef struct SMergeLogicNode { SNodeList* pInputs; int32_t numOfChannels; int32_t srcGroupId; + bool colsMerge; + bool needSort; bool groupSort; bool ignoreGroupId; bool inputWithGroupId; @@ -532,6 +542,7 @@ typedef struct SExchangePhysiNode { typedef struct SMergePhysiNode { SPhysiNode node; + EMergeType type; SNodeList* pMergeKeys; SNodeList* pTargets; int32_t numOfChannels; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 6c991543e3..654a0b6abc 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -304,6 +304,7 @@ typedef struct SCheckpointInfo { int64_t startTs; int64_t checkpointId; int64_t checkpointVer; // latest checkpointId version + int64_t processedVer; // already processed ver, that has generated results version. int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t failedId; // record the latest failed checkpoint id } SCheckpointInfo; @@ -460,7 +461,7 @@ typedef struct STaskStartInfo { int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed - int32_t elapsedTime; + int64_t elapsedTime; } STaskStartInfo; typedef struct STaskUpdateInfo { diff --git a/include/os/osEnv.h b/include/os/osEnv.h index bc65da47a9..ac4ecd4212 100644 --- a/include/os/osEnv.h +++ b/include/os/osEnv.h @@ -36,11 +36,12 @@ extern int64_t tsStreamMax; extern float tsNumOfCores; extern int64_t tsTotalMemoryKB; extern char *tsProcPath; -extern char tsSIMDBuiltins; +extern char tsSIMDEnable; extern char tsSSE42Enable; extern char tsAVXEnable; extern char tsAVX2Enable; extern char tsFMAEnable; +extern char tsAVX512Enable; extern char tsTagFilterCache; extern char configDir[]; diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 29b6f07dca..7a1df2b81c 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -41,7 +41,7 @@ int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuCores(float *numOfCores, bool physical); void taosGetCpuUsage(double *cpu_system, double *cpu_engine); -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma); +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512); int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 980d5b9177..7665550153 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -754,6 +754,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_FUNC_DUP_TIMESTAMP TAOS_DEF_ERROR_CODE(0, 0x2805) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807) +#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2808) //udf #define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901) diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 0b6b0db885..ea41afd8fb 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -267,8 +267,6 @@ typedef struct list_parts_callback_data { } list_parts_callback_data; typedef struct MultipartPartData { - char err_msg[512]; - S3Status status; put_object_callback_data put_object_data; int seq; UploadManager *manager; @@ -276,11 +274,12 @@ typedef struct MultipartPartData { static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) { put_object_callback_data *data = (put_object_callback_data *)callbackData; + /* if (data->infileFD == 0) { MultipartPartData *mpd = (MultipartPartData *)callbackData; data = &mpd->put_object_data; } - + */ int ret = 0; if (data->contentLength) { @@ -458,13 +457,13 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { int metaPropertiesCount = 0; S3NameValue metaProperties[S3_MAX_METADATA_COUNT]; char useServerSideEncryption = 0; - int noStatus = 0; - put_object_callback_data data; + put_object_callback_data data = {0}; + // int noStatus = 0; // data.infile = 0; - data.infileFD = NULL; - data.gb = 0; - data.noStatus = noStatus; + // data.gb = 0; + // data.infileFD = NULL; + // data.noStatus = noStatus; if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { uError("ERROR: %s Failed to stat file %s: ", __func__, file); @@ -581,9 +580,9 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { do { S3_upload_part(&bucketContext, key, &putProperties, &putObjectHandler, seq, manager.upload_id, partContentLength, 0, timeoutMsG, &partData); - } while (S3_status_is_retryable(partData.status) && should_retry()); - if (partData.status != S3StatusOK) { - s3PrintError(__func__, partData.status, partData.err_msg); + } while (S3_status_is_retryable(partData.put_object_data.status) && should_retry()); + if (partData.put_object_data.status != S3StatusOK) { + s3PrintError(__func__, partData.put_object_data.status, partData.put_object_data.err_msg); code = TAOS_SYSTEM_ERROR(EIO); goto clean; } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index bf21b2eda0..054cff560f 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2121,6 +2121,7 @@ _end: char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { char* pBuf = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1); if (!pBuf) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } int32_t code = buildCtbNameByGroupIdImpl(stbFullName, groupId, pBuf); @@ -2133,6 +2134,7 @@ char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) { if (stbFullName[0] == 0) { + terrno = TSDB_CODE_INVALID_PARA; return TSDB_CODE_FAILED; } @@ -2142,6 +2144,7 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha } if (cname == NULL) { + terrno = TSDB_CODE_INVALID_PARA; taosArrayDestroy(tags); return TSDB_CODE_FAILED; } diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 495aee1ece..4a1ba9e391 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -249,7 +249,7 @@ int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; -int32_t tsStreamNodeCheckInterval = 30; +int32_t tsStreamNodeCheckInterval = 15; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -529,7 +529,8 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "simdEnable", tsSIMDBuiltins, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; @@ -609,7 +610,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores / 2; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -1101,7 +1102,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; - tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "simdEnable")->bval; + tsSIMDEnable = (bool)cfgGetItem(pCfg, "simdEnable")->bval; tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 34b789fef8..dc3ba7934f 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -8337,6 +8337,7 @@ int32_t tEncodeSBatchDeleteReq(SEncoder *pEncoder, const SBatchDeleteReq *pReq) if (tEncodeSSingleDeleteReq(pEncoder, pOneReq) < 0) return -1; } if (tEncodeI64(pEncoder, pReq->ctimeMs) < 0) return -1; + if (tEncodeI8(pEncoder, pReq->level) < 0) return -1; return 0; } @@ -8361,6 +8362,9 @@ int32_t tDecodeSBatchDeleteReq(SDecoder *pDecoder, SBatchDeleteReq *pReq) { if (!tDecodeIsEnd(pDecoder)) { if (tDecodeI64(pDecoder, &pReq->ctimeMs) < 0) return -1; } + if (!tDecodeIsEnd(pDecoder)) { + if (tDecodeI8(pDecoder, &pReq->level) < 0) return -1; + } return 0; } diff --git a/source/common/src/tname.c b/source/common/src/tname.c index c6210ca8c9..4fe2beb6aa 100644 --- a/source/common/src/tname.c +++ b/source/common/src/tname.c @@ -296,7 +296,10 @@ static int compareKv(const void* p1, const void* p2) { void buildChildTableName(RandTableName* rName) { SStringBuilder sb = {0}; taosStringBuilderAppendStringLen(&sb, rName->stbFullName, rName->stbFullNameLen); - if (sb.buf == NULL) return; + if (sb.buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return; + } taosArraySort(rName->tags, compareKv); for (int j = 0; j < taosArrayGetSize(rName->tags); ++j) { taosStringBuilderAppendChar(&sb, ','); diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 4b0848e5e9..a701c88a24 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -1580,6 +1580,7 @@ static bool needMoreDigits(SArray* formats, int32_t curIdx) { /// @retval 0 for success /// @retval -1 for format and s mismatch error /// @retval -2 if datetime err, like 2023-13-32 25:61:69 +/// @retval -3 if not supported static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t precision, const char** sErrPos, int32_t* fErrIdx) { int32_t size = taosArrayGetSize(formats); @@ -1589,6 +1590,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec int32_t hour = 0, min = 0, sec = 0, us = 0, ms = 0, ns = 0; int32_t tzSign = 1, tz = tsTimezone; int32_t err = 0; + bool withYD = false, withMD = false; for (int32_t i = 0; i < size && *s != '\0'; ++i) { while (isspace(*s) && *s != '\0') { @@ -1782,6 +1784,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec } else { s = newPos; } + withYD = true; } break; case TSFKW_DD: { const char* newPos = tsFormatStr2Int32(&md, s, 2, needMoreDigits(formats, i)); @@ -1790,6 +1793,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec } else { s = newPos; } + withMD = true; } break; case TSFKW_D: { const char* newPos = tsFormatStr2Int32(&wd, s, 1, needMoreDigits(formats, i)); @@ -1843,6 +1847,10 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec return err; } } + if (!withMD) { + // yyyy-mm-DDD, currently, the c api can't convert to correct timestamp, return not supported + if (withYD) return -3; + } struct STm tm = {0}; tm.tm.tm_year = year - 1900; tm.tm.tm_mon = mon; @@ -1892,8 +1900,13 @@ int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx)); snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos, fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : ""); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR; } else if (code == -2) { snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR; + } else if (code == -3) { + snprintf(errMsg, errMsgLen, "timestamp format not supported"); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED; } return code; } diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index c65d8761b7..107276d7f9 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -489,6 +489,7 @@ TEST(timeTest, char2ts) { ASSERT_EQ(ts, 0); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0")); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a ")); + ASSERT_EQ(-3, TEST_char2ts("yyyy-mm-DDD", &ts, TSDB_TIME_PRECISION_MILLI, "1970-01-001")); } #pragma GCC diagnostic pop diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 471ed99b67..34ee18a5cc 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -313,7 +313,7 @@ _CONNECT: code = 0; char detail[1000] = {0}; - sprintf(detail, "%s:%d, app:%s", ip, pConn->port, connReq.app); + sprintf(detail, "app:%s", connReq.app); auditRecord(pReq, pMnode->clusterId, "login", "", "", detail, strlen(detail)); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 135aab285b..7c3c54537d 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -43,7 +43,7 @@ typedef struct SNodeEntry { } SNodeEntry; typedef struct SStreamExecInfo { - SArray * pNodeEntryList; + SArray *pNodeList; int64_t ts; // snapshot ts int64_t activeCheckpoint; // active check point id SHashObj * pTaskMap; @@ -850,7 +850,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); taosThreadMutexLock(&execInfo.lock); - mDebug("register to stream task node list"); + mDebug("stream tasks register into node list"); keepStreamTasksInBuf(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); @@ -1141,6 +1141,15 @@ static const char *mndGetStreamDB(SMnode *pMnode) { return p; } +static int32_t initStreamNodeList(SMnode* pMnode) { + if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { + execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = extractNodeListFromStream(pMnode); + } + + return taosArrayGetSize(execInfo.pNodeList); +} + static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMnode * pMnode = pReq->info.node; SSdb * pSdb = pMnode->pSdb; @@ -1151,22 +1160,18 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { { // check if the node update happens or not int64_t ts = taosGetTimestampSec(); - if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) { - if (execInfo.pNodeEntryList != NULL) { - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - } + taosThreadMutexLock(&execInfo.lock); + int32_t numOfNodes = initStreamNodeList(pMnode); + taosThreadMutexUnlock(&execInfo.lock); - execInfo.pNodeEntryList = extractNodeListFromStream(pMnode); - } - - if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) { + if (numOfNodes == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); execInfo.ts = ts; return 0; } - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + for(int32_t i = 0; i < numOfNodes; ++i) { + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->stageUpdated) { mDebug("stream task not ready due to node update detected, checkpoint not issued"); return 0; @@ -1180,7 +1185,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { return 0; } - SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0); taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2095,20 +2100,21 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { break; } - SNodeEntry entry = {0}; + SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime}; entry.epset = mndGetVgroupEpset(pMnode, pVgroup); - entry.nodeId = pVgroup->vgId; - entry.hbTimestamp = pVgroup->updateTime; + // if not all ready till now, no need to check the remaining vgroups. if (*allReady) { for (int32_t i = 0; i < pVgroup->replica; ++i) { if (!pVgroup->vnodeGid[i].syncRestore) { + mInfo("vgId:%d not restored, not ready for checkpoint or other operations", pVgroup->vgId); *allReady = false; break; } ESyncState state = pVgroup->vnodeGid[i].syncState; if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) { + mInfo("vgId:%d offline/err, not ready for checkpoint or other operations", pVgroup->vgId); *allReady = false; break; } @@ -2314,9 +2320,9 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { (int32_t)taosArrayGetSize(execInfo.pTaskList)); int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry *p = taosArrayGet(execInfo.pNodeEntryList, i); + SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); + for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i); for (int32_t j = 0; j < size; ++j) { SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); @@ -2327,8 +2333,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { } } - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - execInfo.pNodeEntryList = pValidNodeEntryList; + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pValidNodeEntryList; mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList)); taosArrayDestroy(pRemovedTasks); @@ -2338,6 +2344,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int32_t code = 0; + int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); if (old != 0) { mDebug("still in checking node change"); @@ -2348,23 +2355,21 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int64_t ts = taosGetTimestampSec(); SMnode *pMnode = pMsg->info.node; - if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) { - if (execInfo.pNodeEntryList != NULL) { - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - } - execInfo.pNodeEntryList = extractNodeListFromStream(pMnode); - } - if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) { + taosThreadMutexLock(&execInfo.lock); + int32_t numOfNodes = initStreamNodeList(pMnode); + taosThreadMutexUnlock(&execInfo.lock); + + if (numOfNodes == 0) { mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); execInfo.ts = ts; atomic_store_32(&mndNodeCheckSentinel, 0); return 0; } - bool allVnodeReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVnodeReady); - if (!allVnodeReady) { + bool allVgroupsReady = true; + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady); + if (!allVgroupsReady) { taosArrayDestroy(pNodeSnapshot); atomic_store_32(&mndNodeCheckSentinel, 0); mWarn("not all vnodes are ready, ignore the exec nodeUpdate check"); @@ -2374,7 +2379,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { taosThreadMutexLock(&execInfo.lock); removeExpirednodeEntryAndTask(pNodeSnapshot); - SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. doKillActiveCheckpointTrans(pMnode); @@ -2383,8 +2388,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { // keep the new vnode snapshot if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { mDebug("create trans successfully, update cached node list"); - taosArrayDestroy(execInfo.pNodeEntryList); - execInfo.pNodeEntryList = pNodeSnapshot; + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pNodeSnapshot; execInfo.ts = ts; } else { mDebug("unexpect code during create nodeUpdate trans, code:%s", tstrerror(code)); @@ -2584,7 +2589,7 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { } if (transId == 0) { - mError("failed to find the checkpoint trans, reset not executed"); + mDebug("failed to find the checkpoint trans, reset not executed"); return TSDB_CODE_SUCCESS; } @@ -2623,16 +2628,18 @@ int32_t mndResetFromCheckpoint(SMnode *pMnode) { int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { int32_t num = taosArrayGetSize(pNodeList); + mInfo("set node expired for %d nodes", num); for (int k = 0; k < num; ++k) { - int32_t *pVgId = taosArrayGet(pNodeList, k); + int32_t* pVgId = taosArrayGet(pNodeList, k); + mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); - int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); + int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for (int i = 0; i < numOfNodes; ++i) { - SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { - mInfo("vgId:%d expired in stream task, needs update nodeEp", *pVgId); + mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); pNodeEntry->stageUpdated = true; break; } @@ -2642,10 +2649,10 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } -static void updateStageInfo(STaskStatusEntry *pTaskEntry, int32_t stage) { - int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); - for (int32_t j = 0; j < numOfNodes; ++j) { - SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j); +static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { + int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); + for(int32_t j = 0; j < numOfNodes; ++j) { + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); @@ -2677,12 +2684,20 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); taosThreadMutexLock(&execInfo.lock); + + // extract stream task list int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap); if (numOfExisted == 0) { doExtractTasksFromStream(pMnode); } - setNodeEpsetExpiredFlag(req.pUpdateNodes); + initStreamNodeList(pMnode); + + int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes); + if (numOfUpdated > 0) { + mDebug("%d stream node(s) need updated from report of hbMsg(vgId:%d)", numOfUpdated, req.vgId); + setNodeEpsetExpiredFlag(req.pUpdateNodes); + } for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 585263ef95..66abfd6bc1 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -1561,7 +1561,11 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) { code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, createReq.sql, createReq.sqlLen); + char detail[1000] = {0}; + sprintf(detail, "enable:%d, superUser:%d, sysInfo:%d, password:xxx", + createReq.enable, createReq.superUser, createReq.sysInfo); + + auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, detail, strlen(detail)); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 675bfa334a..fdd449bf36 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -147,7 +147,7 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey) int32_t tqOffsetCommitFile(STqOffsetStore* pStore); // tqSink -int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, +int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr); void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data); @@ -160,7 +160,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq); // tq util -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 264c64e0fc..df1720d4a7 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -286,7 +286,8 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq); -int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); +int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len); +int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len); int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq); int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid); int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 92b8c09fbc..3512f1476f 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -217,10 +217,7 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t lino = 0; SVnode *pVnode = pSma->pVnode; - SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); - if (!pSmaEnv) { - goto _exit; - } + if (!SMA_RSMA_ENV(pSma)) goto _exit; code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 252a3ade36..5dc29509a0 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -17,12 +17,17 @@ #include "tq.h" #include "tstream.h" -#define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt -#define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt -#define RSMA_FETCH_DELAY_MAX (120000) // ms -#define RSMA_FETCH_ACTIVE_MAX (1000) // ms -#define RSMA_FETCH_INTERVAL (5000) // ms -#define RSMA_TASK_FLAG "rsma" +#define RSMA_EXEC_SMOOTH_SIZE (100) // cnt +#define RSMA_EXEC_BATCH_SIZE (1024) // cnt +#define RSMA_FETCH_DELAY_MAX (120000) // ms +#define RSMA_FETCH_ACTIVE_MAX (1000) // ms +#define RSMA_FETCH_INTERVAL (5000) // ms +#define RSMA_EXEC_TASK_FLAG "rsma" +#define RSMA_EXEC_MSG_HLEN (13) // type(int8_t) + len(int32_t) + version(int64_t) +#define RSMA_EXEC_MSG_TYPE(msg) (*(int8_t *)(msg)) +#define RSMA_EXEC_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) +#define RSMA_EXEC_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) +#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), RSMA_EXEC_MSG_HLEN)) #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -38,11 +43,11 @@ static void tdUidStoreDestory(STbUidStore *pStore); static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd); static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - ERsmaExecType type, int8_t level); +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, + SRSmaInfo *pInfo, ERsmaExecType type, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); -static void tdFreeRSmaSubmitItems(SArray *pItems); +static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, int32_t execType, int8_t *streamFlushed); @@ -288,8 +293,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; - pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); - sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); + pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_EXEC_TASK_FLAG) + 1); + sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG); pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); @@ -624,6 +629,45 @@ _end: return code; } +static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatchDeleteReq *pDelReq) { + int32_t code = 0; + int32_t lino = 0; + + if (taosArrayGetSize(pDelReq->deleteReqs) > 0) { + int32_t len = 0; + tEncodeSize(tEncodeSBatchDeleteReq, pDelReq, len, code); + TSDB_CHECK_CODE(code, lino, _exit); + + void *pBuf = rpcMallocCont(len + sizeof(SMsgHead)); + if (!pBuf) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + + SEncoder encoder; + tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SMsgHead)), len); + tEncodeSBatchDeleteReq(&encoder, pDelReq); + tEncoderClear(&encoder); + + ((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode); + + SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, + .pCont = pBuf, + .contLen = len + sizeof(SMsgHead)}; + code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + taosArrayDestroy(pDelReq->deleteReqs); + if (code) { + smaError("vgId:%d, failed at line %d to process delete req for table:%" PRIi64 ", level:%" PRIi8 " since %s", + SMA_VID(pSma), lino, suid, level, tstrerror(code)); + } + + return code; +} + static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, int32_t execType, int8_t *streamFlushed) { int32_t code = 0; @@ -652,9 +696,42 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (output->info.type == STREAM_CHECKPOINT) { if (streamFlushed) *streamFlushed = 1; continue; + } else if (output->info.type == STREAM_DELETE_RESULT) { + SBatchDeleteReq deleteReq = {.suid = suid, .level = pItem->level}; + deleteReq.deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); + if (!deleteReq.deleteReqs) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tqBuildDeleteReq(pSma->pVnode->pTq, NULL, output, &deleteReq, ""); + TSDB_CHECK_CODE(code, lino, _exit); + code = tdRSmaProcessDelReq(pSma, suid, pItem->level, &deleteReq); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + + smaDebug("vgId:%d, result block, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 + ", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, + SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid, + pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows); + + if (STREAM_GET_ALL == execType) { + /** + * 1. reset the output version when reboot + * 2. delete msg version not updated from the result + */ + if (output->info.version < pItem->submitReqVer) { + // submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously + output->info.version = pItem->submitReqVer; + } else if (output->info.version == pItem->fetchResultVer) { + smaWarn("vgId:%d, result block, skip dup version, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64 + ", fetchResultVer:%" PRIi64 ", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64 + ", rows:%" PRIi64, + SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid, + pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows); + continue; + } } - smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), - output->info.id.uid, output->info.id.groupId, output->info.rows); STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; @@ -664,12 +741,6 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma TSDB_CHECK_CODE(code, lino, _exit); } - // reset the output version to handle reboot - if (STREAM_GET_ALL == execType && output->info.version == 0) { - // the submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously - output->info.version = pItem->submitReqVer; - } - if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { if (terrno == TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE) { // TODO: reconfigure SSubmitReq2 @@ -686,8 +757,9 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma atomic_store_64(&pItem->fetchResultVer, output->info.version); } - smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, - SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level:%" PRIi8 + ", execType:%d, ver:%" PRIi64, + SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, execType, output->info.version); if (pReq) { tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); @@ -722,7 +794,7 @@ _exit: */ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { - int32_t size = sizeof(int32_t) + sizeof(int64_t) + len; + int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { @@ -731,6 +803,8 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p void *pItem = qItem; + *(int8_t *)pItem = (int8_t)inputType; + pItem = POINTER_SHIFT(pItem, sizeof(int8_t)); *(int32_t *)pItem = len; pItem = POINTER_SHIFT(pItem, sizeof(int32_t)); *(int64_t *)pItem = version; @@ -749,7 +823,7 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p } // smoothing consume - int32_t n = nItems / RSMA_QTASKEXEC_SMOOTH_SIZE; + int32_t n = nItems / RSMA_EXEC_SMOOTH_SIZE; if (n > 1) { if (n > 10) { n = 10; @@ -796,7 +870,7 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { * @param level * @return int32_t */ -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); @@ -813,25 +887,15 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64 " nMsg:%d", SMA_VID(pSma), level, - RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize); + smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level, + RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); -#if 0 - for (int32_t i = 0; i < msgSize; ++i) { - SSubmitReq *pReq = *(SSubmitReq **)((char *)pMsg + i * sizeof(void *)); - smaDebug("vgId:%d, [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version); - tdRsmaPrintSubmitReq(pSma, pReq); - } -#endif if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); return TSDB_CODE_FAILED; } - if (STREAM_INPUT__MERGED_SUBMIT == inputType) { - SPackedData *packData = POINTER_SHIFT(pMsg, sizeof(SPackedData) * (msgSize - 1)); - atomic_store_64(&pItem->submitReqVer, packData->ver); - } + atomic_store_64(&pItem->submitReqVer, version); terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, NULL); @@ -910,7 +974,7 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg, return TSDB_CODE_SUCCESS; } - if (inputType == STREAM_INPUT__DATA_SUBMIT) { + if (inputType == STREAM_INPUT__DATA_SUBMIT || inputType == STREAM_INPUT__REF_DATA_BLOCK) { if (tdExecuteRSmaImplAsync(pSma, version, pMsg, len, inputType, pRSmaInfo, suid) < 0) { tdReleaseRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; @@ -937,12 +1001,8 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg, return TSDB_CODE_SUCCESS; } -int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len, int32_t inputType) { - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); - if (!pEnv) { - // only applicable when rsma env exists - return TDB_CODE_SUCCESS; - } +int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) { + if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS; if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); @@ -951,27 +1011,25 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, STbUidStore uidStore = {0}; - if (inputType == STREAM_INPUT__DATA_SUBMIT) { - if (tdFetchSubmitReqSuids(pReq, &uidStore) < 0) { - smaError("vgId:%d, failed to process rsma submit fetch suid since: %s", SMA_VID(pSma), terrstr()); + if (tdFetchSubmitReqSuids(pReq, &uidStore) < 0) { + smaError("vgId:%d, failed to process rsma submit fetch suid since: %s", SMA_VID(pSma), terrstr()); + goto _err; + } + + if (uidStore.suid != 0) { + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, uidStore.suid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); goto _err; } - if (uidStore.suid != 0) { - if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, uidStore.suid) < 0) { - smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); + void *pIter = NULL; + while ((pIter = taosHashIterate(uidStore.uidHash, pIter))) { + tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, *pTbSuid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 2 since: %s", SMA_VID(pSma), terrstr()); + taosHashCancelIterate(uidStore.uidHash, pIter); goto _err; } - - void *pIter = NULL; - while ((pIter = taosHashIterate(uidStore.uidHash, pIter))) { - tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, *pTbSuid) < 0) { - smaError("vgId:%d, failed to process rsma submit exec 2 since: %s", SMA_VID(pSma), terrstr()); - taosHashCancelIterate(uidStore.uidHash, pIter); - goto _err; - } - } } } tdUidStoreDestory(&uidStore); @@ -981,6 +1039,24 @@ _err: return terrno; } +int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) { + if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS; + + if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + smaError("vgId:%d, failed to process rsma delete since invalid exec code: %s", SMA_VID(pSma), terrstr()); + goto _err; + } + + SDeleteRes *pDelRes = pReq; + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__REF_DATA_BLOCK, pDelRes->suid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); + goto _err; + } + return TSDB_CODE_SUCCESS; +_err: + return terrno; +} + /** * @brief retrieve rsma meta and init * @@ -1359,10 +1435,20 @@ _end: tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); } -static void tdFreeRSmaSubmitItems(SArray *pItems) { - for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { - SPackedData *packData = taosArrayGet(pItems, i); - taosFreeQitem(POINTER_SHIFT(packData->msgStr, -sizeof(int32_t) - sizeof(int64_t))); +static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type) { + int32_t arrSize = taosArrayGetSize(pItems); + if (type == STREAM_INPUT__MERGED_SUBMIT) { + for (int32_t i = 0; i < arrSize; ++i) { + SPackedData *packData = TARRAY_GET_ELEM(pItems, i); + taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_EXEC_MSG_HLEN)); + } + } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { + for (int32_t i = 0; i < arrSize; ++i) { + SPackedData *packData = TARRAY_GET_ELEM(pItems, i); + blockDataDestroy(packData->pDataBlock); + } + } else { + ASSERTS(0, "unknown type:%d", type); } taosArrayClear(pItems); } @@ -1427,40 +1513,98 @@ _err: } static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { + void *msg = NULL; + int8_t resume = 0; + int32_t nSubmit = 0; + int32_t nDelete = 0; + int64_t version = 0; + + SPackedData packData; + taosArrayClear(pSubmitArr); + + // the submitReq/deleteReq msg may exsit alternately in the msg queue, consume them sequentially in batch mode while (1) { - void *msg = NULL; taosGetQitem(qall, (void **)&msg); if (msg) { - SPackedData packData = {.msgLen = *(int32_t *)msg, - .ver = *(int64_t *)POINTER_SHIFT(msg, sizeof(int32_t)), - .msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t))}; + int8_t inputType = RSMA_EXEC_MSG_TYPE(msg); + if (inputType == STREAM_INPUT__DATA_SUBMIT) { + if (nDelete > 0) { + resume = 1; + break; + } + _resume_submit: + packData.msgLen = RSMA_EXEC_MSG_LEN(msg); + packData.ver = RSMA_EXEC_MSG_VER(msg); + packData.msgStr = RSMA_EXEC_MSG_BODY(msg); + version = packData.ver; + if (!taosArrayPush(pSubmitArr, &packData)) { + taosFreeQitem(msg); + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + ++nSubmit; + } else if (inputType == STREAM_INPUT__REF_DATA_BLOCK) { + if (nSubmit > 0) { + resume = 2; + break; + } + _resume_delete: + version = RSMA_EXEC_MSG_VER(msg); + if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), version, + &packData.pDataBlock, 1))) { + taosFreeQitem(msg); + goto _err; + } - if (!taosArrayPush(pSubmitArr, &packData)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tdFreeRSmaSubmitItems(pSubmitArr); - goto _err; + if (packData.pDataBlock && !taosArrayPush(pSubmitArr, &packData)) { + taosFreeQitem(msg); + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + taosFreeQitem(msg); + if (packData.pDataBlock) { + // packData.pDataBlock is NULL if delete affects 0 row + ++nDelete; + } + } else { + ASSERTS(0, "unknown msg type:%d", inputType); + break; } + } + + if (nSubmit > 0 || nDelete > 0) { + int32_t size = TARRAY_SIZE(pSubmitArr); + ASSERTS(size > 0, "size is %d", size); + int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, version, inputType, pInfo, type, i) < 0) { + goto _err; + } + } + tdFreeRSmaSubmitItems(pSubmitArr, inputType); + nSubmit = 0; + nDelete = 0; } else { - break; + goto _rtn; + } + + if (resume == 1) { + resume = 0; + goto _resume_submit; + } else if (resume == 2) { + resume = 0; + goto _resume_delete; } } - int32_t size = taosArrayGetSize(pSubmitArr); - if (size > 0) { - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { - goto _err; - } - } - tdFreeRSmaSubmitItems(pSubmitArr); - } +_rtn: return TSDB_CODE_SUCCESS; _err: atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid, type, (int32_t)taosArrayGetSize(pSubmitArr), terrstr()); - tdFreeRSmaSubmitItems(pSubmitArr); + tdFreeRSmaSubmitItems(pSubmitArr, nSubmit ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK); while (1) { void *msg = NULL; taosGetQitem(qall, (void **)&msg); @@ -1497,7 +1641,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { } if (!(pSubmitArr = - taosArrayInit(TMIN(RSMA_SUBMIT_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) { + taosArrayInit(TMIN(RSMA_EXEC_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index 08ddc4bd7b..289986e01f 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -188,7 +188,8 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * if (pDataBlock->info.type == STREAM_DELETE_RESULT) { pDeleteReq->suid = suid; pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); - tqBuildDeleteReq(stbFullName, pDataBlock, pDeleteReq, ""); + code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, ""); + TSDB_CHECK_CODE(code, lino, _exit); continue; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1c1a4a192c..3ae0eb1ddf 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1668,7 +1668,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) SStreamCheckpointSourceReq req = {0}; if (!vnodeIsRoleLeader(pTq->pVnode)) { - tqDebug("vgId:%d not leader, ignore checkpoint-source msg", vgId); + tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs @@ -1676,7 +1676,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } if (!pTq->pVnode->restored) { - tqDebug("vgId:%d checkpoint-source msg received during restoring, ignore it", vgId); + tqDebug("vgId:%d checkpoint-source msg received during restoring, s-task:0x%x ignore it", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs @@ -1696,7 +1696,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } tDecoderClear(&decoder); - // todo handle failure to reset from checkpoint procedure SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); if (pTask == NULL) { tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, @@ -1707,7 +1706,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) return TSDB_CODE_SUCCESS; } - // todo handle failure to reset from checkpoint procedure // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. if (pTask->status.downstreamReady != 1) { pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id @@ -1728,17 +1726,32 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) ETaskStatus status = streamTaskGetStatus(pTask, NULL); if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { - qError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", + tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", pTask->id.idStr, req.checkpointId); - taosThreadMutexUnlock(&pTask->lock); + taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; } + + // check if the checkpoint msg already sent or not. + if (status == TASK_STATUS__CK) { + ASSERT(pTask->checkpointingId == req.checkpointId); + tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64 + " already received, ignore this msg and continue process checkpoint", + pTask->id.idStr, pTask->checkpointingId); + + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pMeta, pTask); + + return TSDB_CODE_SUCCESS; + } + streamProcessCheckpointSourceReq(pTask, &req); taosThreadMutexUnlock(&pTask->lock); @@ -1924,8 +1937,59 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { pMeta->startInfo.tasksWillRestart = 0; streamMetaWUnLock(pMeta); } else { - streamMetaWUnLock(pMeta); + tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); + +#if 1 tqStartStreamTaskAsync(pTq, true); + streamMetaWUnLock(pMeta); +#else + streamMetaWUnLock(pMeta); + + // For debug purpose. + // the following procedure consume many CPU resource, result in the re-election of leader + // with high probability. So we employ it as a test case for the stream processing framework, with + // checkpoint/restart/nodeUpdate etc. + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + int32_t code = streamMetaReopen(pMeta); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + tqInfo("vgId:%d start all stream tasks after all being updated", vgId); + tqResetStreamTaskStatus(pTq); + tqStartStreamTaskAsync(pTq, false); + } else { + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + streamMetaWUnLock(pMeta); +#endif } } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index bd2a591a98..41b1aa7bd1 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -343,7 +343,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead)); int32_t len = pCont->bodyLen - sizeof(SMsgHead); - code = extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); + code = extractDelDataBlock(pBody, len, ver, (void**)pItem, 0); if (code == TSDB_CODE_SUCCESS) { if (*pItem == NULL) { tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 4b64737936..c2e48d5d92 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -43,7 +43,7 @@ static SArray* createDefaultTagColName(); static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName, int64_t gid); -int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, +int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { int32_t totalRows = pDataBlock->info.rows; SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX); @@ -58,8 +58,9 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row); int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row); - char* name; - void* varTbName = NULL; + char* name = NULL; + char* originName = NULL; + void* varTbName = NULL; if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) { varTbName = colDataGetVarData(pTbNameCol, row); } @@ -67,18 +68,29 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, if (varTbName != NULL && varTbName != (void*)-1) { name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN); memcpy(name, varDataVal(varTbName), varDataLen(varTbName)); - } else { + } else if (stbFullName) { name = buildCtbNameByGroupId(stbFullName, groupId); + } else { + originName = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE); + if (metaGetTableNameByUid(pTq->pVnode, groupId, originName) == 0) { + name = varDataVal(originName); + } } - tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, - pIdStr, groupId, name, skey, ekey); + if (!name || *name == '\0') { + tqWarn("s-task:%s failed to build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64 + " since invalid tbname:%s", + pIdStr, groupId, skey, ekey, name ? name : "NULL"); + } else { + tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr, + groupId, name, skey, ekey); - SSingleDeleteReq req = { .startTs = skey, .endTs = ekey}; - strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); - taosMemoryFree(name); - - taosArrayPush(deleteReq->deleteReqs, &req); + SSingleDeleteReq req = {.startTs = skey, .endTs = ekey}; + strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); + taosArrayPush(deleteReq->deleteReqs, &req); + } + if (originName) name = originName; + taosMemoryFreeClear(name); } return 0; @@ -345,7 +357,7 @@ int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* int64_t suid) { SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); + int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 26849f8578..e578638e9d 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -299,7 +299,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks); + tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } @@ -448,7 +448,7 @@ bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* num numOfNewItems += 1; int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); pTask->chkInfo.nextProcessVer = ver; - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", id, ver); + tqDebug("s-task:%s set ver:%" PRId64 " for reader after extract data from WAL", id, ver); bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver); if (itemInFillhistory) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index d8fe899bf6..8f62928d22 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -399,7 +399,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) { SDecoder* pCoder = &(SDecoder){0}; SDeleteRes* pRes = &(SDeleteRes){0}; @@ -442,14 +442,21 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream } taosArrayDestroy(pRes->uidList); - *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); - if (*pRefBlock == NULL) { - blockDataCleanup(pDelBlock); - taosMemoryFree(pDelBlock); - return TSDB_CODE_OUT_OF_MEMORY; + if (type == 0) { + *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + if (*pRefBlock == NULL) { + blockDataCleanup(pDelBlock); + taosMemoryFree(pDelBlock); + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SStreamRefDataBlock*)(*pRefBlock))->type = STREAM_INPUT__REF_DATA_BLOCK; + ((SStreamRefDataBlock*)(*pRefBlock))->pBlock = pDelBlock; + } else if (type == 1) { + *pRefBlock = pDelBlock; + } else { + ASSERTS(0, "unknown type:%d", type); } - (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; - (*pRefBlock)->pBlock = pDelBlock; return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index adb72821e4..8b9cae42fc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -338,7 +338,7 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 // 2, retrieve pgs from s3 uint8_t *pBlock = NULL; int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage); - int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont; + int64_t pgnoEnd = pgno - 1 + (bOffset + size - n + szPgCont - 1) / szPgCont; int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock); if (code != TSDB_CODE_SUCCESS) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index c6c93e3d3f..899efc8e70 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -25,9 +25,11 @@ static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); -static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc); static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); -static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc); static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -380,7 +382,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { SEncoder *pCoder = &(SEncoder){0}; SDeleteRes res = {0}; - SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb, .skipRollup = 1}; initStorageAPI(&handle.api); code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res); @@ -509,13 +511,13 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg if (vnodeProcessDropStbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; case TDMT_VND_CREATE_TABLE: - if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; + if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err; break; case TDMT_VND_ALTER_TABLE: if (vnodeProcessAlterTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; case TDMT_VND_DROP_TABLE: - if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; + if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err; break; case TDMT_VND_DROP_TTL_TABLE: if (vnodeProcessDropTtlTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; @@ -878,7 +880,8 @@ _err: return -1; } -static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { +static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc) { SDecoder decoder = {0}; SEncoder encoder = {0}; int32_t rcode = 0; @@ -928,6 +931,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, goto _exit; } + if(tsEnableAudit && tsEnableAuditCreateTable){ + char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); + if (str == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + rcode = -1; + goto _exit; + } + strcpy(str, pCreateReq->name); + taosArrayPush(tbNames, &str); + } + // validate hash sprintf(tbName, "%s.%s", pVnode->config.dbname, pCreateReq->name); if (vnodeValidateTableHash(pVnode, tbName) < 0) { @@ -951,12 +965,6 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, } taosArrayPush(rsp.pArray, &cRsp); - - if (tsEnableAuditCreateTable) { - char *str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); - strcpy(str, pCreateReq->name); - taosArrayPush(tbNames, &str); - } } vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); @@ -978,17 +986,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen); tEncodeSVCreateTbBatchRsp(&encoder, &rsp); - if (tsEnableAuditCreateTable) { + if(tsEnableAudit && tsEnableAuditCreateTable){ int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId; SName name = {0}; tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); SStringBuilder sb = {0}; - for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { - char **key = (char **)taosArrayGet(tbNames, iReq); + for(int32_t i = 0; i < tbNames->size; i++){ + char** key = (char**)taosArrayGet(tbNames, i); taosStringBuilderAppendStringLen(&sb, *key, strlen(*key)); - if (iReq < req.nReqs - 1) { + if(i < tbNames->size - 1){ taosStringBuilderAppendChar(&sb, ','); } taosMemoryFreeClear(*key); @@ -997,7 +1005,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, size_t len = 0; char *keyJoined = taosStringBuilderGetResult(&sb, &len); - auditRecord(NULL, clusterId, "createTable", name.dbname, "", keyJoined, len); + auditRecord(pOriginRpc, clusterId, "createTable", name.dbname, "", keyJoined, len); taosStringBuilderDestroy(&sb); } @@ -1139,7 +1147,8 @@ _exit: return 0; } -static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { +static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc) { SVDropTbBatchReq req = {0}; SVDropTbBatchRsp rsp = {0}; SDecoder decoder = {0}; @@ -1218,7 +1227,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in size_t len = 0; char *keyJoined = taosStringBuilderGetResult(&sb, &len); - auditRecord(NULL, clusterId, "dropTable", name.dbname, "", keyJoined, len); + auditRecord(pOriginRpc, clusterId, "dropTable", name.dbname, "", keyJoined, len); taosStringBuilderDestroy(&sb); } @@ -1669,7 +1678,7 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); if (code == 0) { atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); - code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); + code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len); } // clear @@ -1886,6 +1895,11 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe SMetaReader mr = {0}; metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK); + STsdb *pTsdb = pVnode->pTsdb; + + if (deleteReq.level) { + pTsdb = deleteReq.level == 1 ? VND_RSMA1(pVnode) : VND_RSMA2(pVnode); + } int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); for (int32_t i = 0; i < sz; i++) { @@ -1898,21 +1912,22 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe int64_t uid = mr.me.uid; - int32_t code = tsdbDeleteTableData(pVnode->pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + int32_t code = tsdbDeleteTableData(pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); if (code < 0) { terrno = code; vError("vgId:%d, delete error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 ", end ts:%" PRId64, TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); } - code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); - if (code < 0) { - terrno = code; - vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 - ", end ts:%" PRId64, - TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + if (deleteReq.level == 0) { + code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); + if (code < 0) { + terrno = code; + vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 + ", end ts:%" PRId64, + TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + } } - tDecoderClear(&mr.coder); } metaReaderClear(&mr); @@ -1947,6 +1962,8 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in if (code) goto _err; } + code = tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len); + tDecoderClear(pCoder); taosArrayDestroy(pRes->uidList); diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index 535167e65c..d7ded9d6f1 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -37,7 +37,7 @@ extern "C" { #define EXPLAIN_TABLE_COUNT_SCAN_FORMAT "Table Count Row Scan on %s" #define EXPLAIN_PROJECTION_FORMAT "Projection" #define EXPLAIN_JOIN_FORMAT "%s" -#define EXPLAIN_AGG_FORMAT "Aggragate" +#define EXPLAIN_AGG_FORMAT "%s" #define EXPLAIN_INDEF_ROWS_FORMAT "Indefinite Rows Function" #define EXPLAIN_EXCHANGE_FORMAT "Data Exchange %d:1" #define EXPLAIN_SORT_FORMAT "Sort" @@ -59,7 +59,7 @@ extern "C" { #define EXPLAIN_TIME_WINDOWS_FORMAT "Time Window: interval=%" PRId64 "%c offset=%" PRId64 "%c sliding=%" PRId64 "%c" #define EXPLAIN_WINDOW_FORMAT "Window: gap=%" PRId64 #define EXPLAIN_RATIO_TIME_FORMAT "Ratio: %f" -#define EXPLAIN_MERGE_FORMAT "SortMerge" +#define EXPLAIN_MERGE_FORMAT "Merge" #define EXPLAIN_MERGE_KEYS_FORMAT "Merge Key: " #define EXPLAIN_IGNORE_GROUPID_FORMAT "Ignore Group Id: %s" #define EXPLAIN_PARTITION_KETS_FORMAT "Partition Key: " @@ -85,7 +85,9 @@ extern "C" { #define EXPLAIN_COLUMNS_FORMAT "columns=%d" #define EXPLAIN_PSEUDO_COLUMNS_FORMAT "pseudo_columns=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d" -#define EXPLAIN_TABLE_SCAN_FORMAT "order=[asc|%d desc|%d]" +#define EXPLAIN_SCAN_ORDER_FORMAT "order=[asc|%d desc|%d]" +#define EXPLAIN_SCAN_MODE_FORMAT "mode=%s" +#define EXPLAIN_SCAN_DATA_LOAD_FORMAT "data_load=%s" #define EXPLAIN_GROUPS_FORMAT "groups=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c" @@ -105,6 +107,7 @@ extern "C" { #define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d" #define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d" #define EXPLAIN_PLAN_BLOCKING "blocking=%d" +#define EXPLAIN_MERGE_MODE_FORMAT "mode=%s" #define COMMAND_RESET_LOG "resetLog" #define COMMAND_SCHEDULE_POLICY "schedulePolicy" @@ -156,6 +159,7 @@ typedef struct SExplainCtx { #define EXPLAIN_ORDER_STRING(_order) ((ORDER_ASC == _order) ? "asc" : ORDER_DESC == _order ? "desc" : "unknown") #define EXPLAIN_JOIN_STRING(_type) ((JOIN_TYPE_INNER == _type) ? "Inner join" : "Join") +#define EXPLAIN_MERGE_MODE_STRING(_mode) ((_mode) == MERGE_TYPE_SORT ? "sort" : ((_mode) == MERGE_TYPE_NON_SORT ? "merge" : "column")) #define INVERAL_TIME_FROM_PRECISION_TO_UNIT(_t, _u, _p) (((_u) == 'n' || (_u) == 'y') ? (_t) : (convertTimeFromPrecisionToUnit(_t, _p, _u))) diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 27cfaab3cf..66b50bcb47 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -20,6 +20,7 @@ #include "tcommon.h" #include "tdatablock.h" #include "systable.h" +#include "functionMgt.h" int32_t qExplainGenerateResNode(SPhysiNode *pNode, SExplainGroup *group, SExplainResNode **pRes); int32_t qExplainAppendGroupResRows(void *pCtx, int32_t groupId, int32_t level, bool singleChannel); @@ -284,10 +285,49 @@ int32_t qExplainResAppendRow(SExplainCtx *ctx, char *tbuf, int32_t len, int32_t return TSDB_CODE_SUCCESS; } -static uint8_t getIntervalPrecision(SIntervalPhysiNode *pIntNode) { +static uint8_t qExplainGetIntervalPrecision(SIntervalPhysiNode *pIntNode) { return ((SColumnNode *)pIntNode->window.pTspk)->node.resType.precision; } +static char* qExplainGetScanMode(STableScanPhysiNode* pScan) { + bool isGroupByTbname = false; + bool isGroupByTag = false; + bool seq = false; + bool groupOrder = false; + if (pScan->pGroupTags && LIST_LENGTH(pScan->pGroupTags) == 1) { + SNode* p = nodesListGetNode(pScan->pGroupTags, 0); + if (QUERY_NODE_FUNCTION == nodeType(p) && (strcmp(((struct SFunctionNode*)p)->functionName, "tbname") == 0)) { + isGroupByTbname = true; + } + } + + isGroupByTag = (NULL != pScan->pGroupTags) && !isGroupByTbname; + if ((((!isGroupByTag) || isGroupByTbname) && pScan->groupSort) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) { + return "seq_grp_order"; + } + + if ((isGroupByTbname && (pScan->groupSort || pScan->scan.groupOrderScan)) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) { + return "grp_order"; + } + + return "ts_order"; +} + +static char* qExplainGetScanDataLoad(STableScanPhysiNode* pScan) { + switch (pScan->dataRequired) { + case FUNC_DATA_REQUIRED_DATA_LOAD: + return "data"; + case FUNC_DATA_REQUIRED_SMA_LOAD: + return "sma"; + case FUNC_DATA_REQUIRED_NOT_LOAD: + return "no"; + default: + break; + } + + return "unknown"; +} + int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) { int32_t tlen = 0; bool isVerboseLine = false; @@ -360,7 +400,11 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_TABLE_SCAN_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_ORDER_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_MODE_FORMAT, qExplainGetScanMode(pTblScanNode)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_DATA_LOAD_FORMAT, qExplainGetScanDataLoad(pTblScanNode)); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); @@ -599,7 +643,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: { SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode; - EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT); + EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "GroupAggragate" : "Aggragate")); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); if (pResNode->pExecInfo) { QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen)); @@ -841,7 +885,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), @@ -893,7 +937,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), @@ -1119,41 +1163,33 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_INPUT_ORDER_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.inputTsOrder)); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_OUTPUT_ORDER_TYPE_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.outputTsOrder)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_MERGE_MODE_FORMAT, EXPLAIN_MERGE_MODE_STRING(pMergeNode->type)); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); if (EXPLAIN_MODE_ANALYZE == ctx->mode) { - // sort key - EXPLAIN_ROW_NEW(level + 1, "Merge Key: "); - if (pResNode->pExecInfo) { - for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { - SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); - EXPLAIN_ROW_APPEND("%s ", nodesGetNameFromColumnNode(ptn->pExpr)); + if (MERGE_TYPE_SORT == pMergeNode->type) { + // sort method + EXPLAIN_ROW_NEW(level + 1, "Sort Method: "); + + int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo); + SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0); + SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo; + EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort"); + if (pExecInfo->sortBuffer > 1024 * 1024) { + EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0)); + } else if (pExecInfo->sortBuffer > 1024) { + EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0)); + } else { + EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer); } + + EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops); + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); } - - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); - - // sort method - EXPLAIN_ROW_NEW(level + 1, "Sort Method: "); - - int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo); - SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0); - SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo; - EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort"); - if (pExecInfo->sortBuffer > 1024 * 1024) { - EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0)); - } else if (pExecInfo->sortBuffer > 1024) { - EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0)); - } else { - EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer); - } - - EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops); - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); } if (verbose) { @@ -1167,29 +1203,31 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false"); - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); + if (MERGE_TYPE_SORT == pMergeNode->type) { + EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false"); + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT); - if (pMergeNode->groupSort) { - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc"); - if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) { - EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT); + if (pMergeNode->groupSort) { + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc"); + if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) { + EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + } } - } - for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { - SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr)); - EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order)); - if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) { - EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { + SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order)); + if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) { + EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + } } + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); } - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); if (pMergeNode->node.pConditions) { EXPLAIN_ROW_NEW(level + 1, EXPLAIN_FILTER_FORMAT); @@ -1419,7 +1457,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index a0e35fff87..1f82a9477b 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -77,6 +77,12 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf SPackedData tmp = {.pDataBlock = input}; taosArrayPush(pInfo->pBlockLists, &tmp); pInfo->blockType = STREAM_INPUT__CHECKPOINT; + } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); + taosArrayPush(pInfo->pBlockLists, pReq); + } + pInfo->blockType = STREAM_INPUT__DATA_BLOCK; } return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/src/mergejoinoperator.c b/source/libs/executor/src/mergejoinoperator.c index 2348a3c97b..b4461f20b1 100644 --- a/source/libs/executor/src/mergejoinoperator.c +++ b/source/libs/executor/src/mergejoinoperator.c @@ -239,7 +239,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream = 2; } else { pInfo->downstreamResBlkId[0] = getOperatorResultBlockId(pDownstream[0], 0); - pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 1); + pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 0); } int32_t numOfCols = 0; diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c new file mode 100755 index 0000000000..093b6ab11e --- /dev/null +++ b/source/libs/executor/src/mergeoperator.c @@ -0,0 +1,531 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "executorInt.h" +#include "filter.h" +#include "operator.h" +#include "querytask.h" +#include "tdatablock.h" + +typedef struct SSortMergeInfo { + SArray* pSortInfo; + SSortHandle* pSortHandle; + STupleHandle* prefetchedTuple; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SSDataBlock* pIntermediateBlock; // to hold the intermediate result + SSDataBlock* pInputBlock; + SColMatchInfo matchInfo; +} SSortMergeInfo; + +typedef struct SNonSortMergeInfo { + int32_t lastSourceIdx; + int32_t sourceWorkIdx; + int32_t sourceNum; + int32_t* pSourceStatus; +} SNonSortMergeInfo; + +typedef struct SColsMergeInfo { + SNodeList* pTargets; + uint64_t srcBlkIds[2]; +} SColsMergeInfo; + +typedef struct SMultiwayMergeOperatorInfo { + SOptrBasicInfo binfo; + EMergeType type; + union { + SSortMergeInfo sortMergeInfo; + SNonSortMergeInfo nsortMergeInfo; + SColsMergeInfo colsMergeInfo; + }; + SLimitInfo limitInfo; + bool groupMerge; + bool ignoreGroupId; + uint64_t groupId; + bool inputWithGroupId; +} SMultiwayMergeOperatorInfo; + +SSDataBlock* sortMergeloadNextDataBlock(void* param) { + SOperatorInfo* pOperator = (SOperatorInfo*)param; + SSDataBlock* pBlock = pOperator->fpSet.getNextFn(pOperator); + return pBlock; +} + +int32_t openSortMergeOperator(SOperatorInfo* pOperator) { + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + + int32_t numOfBufPage = pSortMergeInfo->sortBufSize / pSortMergeInfo->bufPageSize; + + pSortMergeInfo->pSortHandle = tsortCreateSortHandle(pSortMergeInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pSortMergeInfo->bufPageSize, numOfBufPage, + pSortMergeInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); + + tsortSetFetchRawDataFp(pSortMergeInfo->pSortHandle, sortMergeloadNextDataBlock, NULL, NULL); + tsortSetCompareGroupId(pSortMergeInfo->pSortHandle, pInfo->groupMerge); + + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + SOperatorInfo* pDownstream = pOperator->pDownstream[i]; + if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { + pDownstream->fpSet._openFn(pDownstream); + } + + SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); + ps->param = pDownstream; + ps->onlyRef = true; + + tsortAddSource(pSortMergeInfo->pSortHandle, ps); + } + + return tsortOpen(pSortMergeInfo->pSortHandle); +} + +static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity, + SSDataBlock* p, bool* newgroup) { + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + *newgroup = false; + + while (1) { + STupleHandle* pTupleHandle = NULL; + if (pInfo->groupMerge || pInfo->inputWithGroupId) { + if (pSortMergeInfo->prefetchedTuple == NULL) { + pTupleHandle = tsortNextTuple(pHandle); + } else { + pTupleHandle = pSortMergeInfo->prefetchedTuple; + pSortMergeInfo->prefetchedTuple = NULL; + uint64_t gid = tsortGetGroupId(pTupleHandle); + if (gid != pInfo->groupId) { + *newgroup = true; + pInfo->groupId = gid; + } + } + } else { + pTupleHandle = tsortNextTuple(pHandle); + pInfo->groupId = 0; + } + + if (pTupleHandle == NULL) { + break; + } + + if (pInfo->groupMerge || pInfo->inputWithGroupId) { + uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); + if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) { + appendOneRowToDataBlock(p, pTupleHandle); + p->info.id.groupId = tupleGroupId; + pInfo->groupId = tupleGroupId; + } else { + if (p->info.rows == 0) { + appendOneRowToDataBlock(p, pTupleHandle); + p->info.id.groupId = pInfo->groupId = tupleGroupId; + } else { + pSortMergeInfo->prefetchedTuple = pTupleHandle; + break; + } + } + } else { + appendOneRowToDataBlock(p, pTupleHandle); + } + + if (p->info.rows >= capacity) { + break; + } + } +} + +SSDataBlock* doSortMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + SSortHandle* pHandle = pSortMergeInfo->pSortHandle; + SSDataBlock* pDataBlock = pInfo->binfo.pRes; + SArray* pColMatchInfo = pSortMergeInfo->matchInfo.pList; + int32_t capacity = pOperator->resultInfo.capacity; + + qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); + + blockDataCleanup(pDataBlock); + + if (pSortMergeInfo->pIntermediateBlock == NULL) { + pSortMergeInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle); + if (pSortMergeInfo->pIntermediateBlock == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + blockDataEnsureCapacity(pSortMergeInfo->pIntermediateBlock, capacity); + } else { + blockDataCleanup(pSortMergeInfo->pIntermediateBlock); + } + + SSDataBlock* p = pSortMergeInfo->pIntermediateBlock; + bool newgroup = false; + + while (1) { + doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup); + if (p->info.rows == 0) { + break; + } + + if (newgroup) { + resetLimitInfoForNextGroup(&pInfo->limitInfo); + } + + applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo); + + if (p->info.rows > 0) { + break; + } + } + + if (p->info.rows > 0) { + int32_t numOfCols = taosArrayGetSize(pColMatchInfo); + for (int32_t i = 0; i < numOfCols; ++i) { + SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); + + SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); + SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); + colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info); + } + + pDataBlock->info.rows = p->info.rows; + pDataBlock->info.scanFlag = p->info.scanFlag; + if (pInfo->ignoreGroupId) { + pDataBlock->info.id.groupId = 0; + } else { + pDataBlock->info.id.groupId = pInfo->groupId; + } + pDataBlock->info.dataLoad = 1; + } + + qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId, + pDataBlock->info.rows); + + return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; +} + + +int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); + + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + + *pSortExecInfo = tsortGetSortExecInfo(pSortMergeInfo->pSortHandle); + *pOptrExplain = pSortExecInfo; + + *len = sizeof(SSortExecInfo); + return TSDB_CODE_SUCCESS; +} + + +void destroySortMergeOperatorInfo(void* param) { + SSortMergeInfo* pSortMergeInfo = param; + pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock); + pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock); + + taosArrayDestroy(pSortMergeInfo->matchInfo.pList); + + tsortDestroySortHandle(pSortMergeInfo->pSortHandle); + taosArrayDestroy(pSortMergeInfo->pSortInfo); +} + +#define NON_SORT_NEXT_SRC(_info, _idx) ((++(_idx) >= (_info)->sourceNum) ? ((_info)->sourceWorkIdx) : (_idx)) + +int32_t openNonSortMergeOperator(SOperatorInfo* pOperator) { + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SNonSortMergeInfo* pNonSortMergeInfo = &pInfo->nsortMergeInfo; + + pNonSortMergeInfo->sourceWorkIdx = 0; + pNonSortMergeInfo->sourceNum = pOperator->numOfDownstream; + pNonSortMergeInfo->lastSourceIdx = -1; + pNonSortMergeInfo->pSourceStatus = taosMemoryCalloc(pOperator->numOfDownstream, sizeof(*pNonSortMergeInfo->pSourceStatus)); + if (NULL == pNonSortMergeInfo->pSourceStatus) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + pNonSortMergeInfo->pSourceStatus[i] = i; + } + + return TSDB_CODE_SUCCESS; +} + +SSDataBlock* doNonSortMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo; + SSDataBlock* pBlock = NULL; + + qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo)); + + int32_t idx = NON_SORT_NEXT_SRC(pNonSortMerge, pNonSortMerge->lastSourceIdx); + while (idx < pNonSortMerge->sourceNum) { + pBlock = getNextBlockFromDownstream(pOperator, pNonSortMerge->pSourceStatus[idx]); + if (NULL == pBlock) { + TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]); + pNonSortMerge->sourceWorkIdx++; + idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx); + continue; + } + break; + } + + return pBlock; +} + +void destroyNonSortMergeOperatorInfo(void* param) { + SNonSortMergeInfo* pNonSortMerge = param; + taosMemoryFree(pNonSortMerge->pSourceStatus); +} + +int32_t getNonSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + return TSDB_CODE_SUCCESS; +} + + +int32_t openColsMergeOperator(SOperatorInfo* pOperator) { + return TSDB_CODE_SUCCESS; +} + +int32_t copyColumnsValue(SNodeList* pNodeList, uint64_t targetBlkId, SSDataBlock* pDst, SSDataBlock* pSrc) { + bool isNull = (NULL == pSrc || pSrc->info.rows <= 0); + size_t numOfCols = LIST_LENGTH(pNodeList); + for (int32_t i = 0; i < numOfCols; ++i) { + STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i); + if (nodeType(pNode->pExpr) == QUERY_NODE_COLUMN && ((SColumnNode*)pNode->pExpr)->dataBlockId == targetBlkId) { + SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, pNode->slotId); + if (isNull) { + colDataSetVal(pDstCol, 0, NULL, true); + } else { + SColumnInfoData* pSrcCol = taosArrayGet(pSrc->pDataBlock, ((SColumnNode*)pNode->pExpr)->slotId); + colDataAssign(pDstCol, pSrcCol, 1, &pDst->info); + } + } + } + + return TSDB_CODE_SUCCESS; +} + +SSDataBlock* doColsMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SSDataBlock* pBlock = NULL; + SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo; + int32_t nullBlkNum = 0; + + qDebug("start to merge columns, %s", GET_TASKID(pTaskInfo)); + + for (int32_t i = 0; i < 2; ++i) { + pBlock = getNextBlockFromDownstream(pOperator, i); + if (pBlock && pBlock->info.rows > 1) { + qError("more than 1 row returned from downstream, rows:%" PRId64, pBlock->info.rows); + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR); + } else if (NULL == pBlock) { + nullBlkNum++; + } + + copyColumnsValue(pColsMerge->pTargets, pColsMerge->srcBlkIds[i], pInfo->binfo.pRes, pBlock); + } + + setOperatorCompleted(pOperator); + + if (2 == nullBlkNum) { + return NULL; + } + + pInfo->binfo.pRes->info.rows = 1; + + return pInfo->binfo.pRes; +} + +void destroyColsMergeOperatorInfo(void* param) { +} + +int32_t getColsMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + return TSDB_CODE_SUCCESS; +} + + +SOperatorFpSet gMultiwayMergeFps[MERGE_TYPE_MAX_VALUE] = { + {0}, + {._openFn = openSortMergeOperator, .getNextFn = doSortMerge, .closeFn = destroySortMergeOperatorInfo, .getExplainFn = getSortMergeExplainExecInfo}, + {._openFn = openNonSortMergeOperator, .getNextFn = doNonSortMerge, .closeFn = destroyNonSortMergeOperatorInfo, .getExplainFn = getNonSortMergeExplainExecInfo}, + {._openFn = openColsMergeOperator, .getNextFn = doColsMerge, .closeFn = destroyColsMergeOperatorInfo, .getExplainFn = getColsMergeExplainExecInfo}, +}; + + +int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { + int32_t code = 0; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + if (OPTR_IS_OPENED(pOperator)) { + return TSDB_CODE_SUCCESS; + } + + int64_t startTs = taosGetTimestampUs(); + + if (NULL != gMultiwayMergeFps[pInfo->type]._openFn) { + code = (*gMultiwayMergeFps[pInfo->type]._openFn)(pOperator); + } + + pOperator->cost.openCost = (taosGetTimestampUs() - startTs) / 1000.0; + pOperator->status = OP_RES_TO_RETURN; + + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, terrno); + } + + OPTR_SET_OPENED(pOperator); + return code; +} + +SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + SSDataBlock* pBlock = NULL; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + + int32_t code = pOperator->fpSet._openFn(pOperator); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + + if (NULL != gMultiwayMergeFps[pInfo->type].getNextFn) { + pBlock = (*gMultiwayMergeFps[pInfo->type].getNextFn)(pOperator); + } + if (pBlock != NULL) { + pOperator->resultInfo.totalRows += pBlock->info.rows; + } else { + setOperatorCompleted(pOperator); + } + + return pBlock; +} + +void destroyMultiwayMergeOperatorInfo(void* param) { + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; + pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); + + if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) { + (*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo); + } + + taosMemoryFreeClear(param); +} + +int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + int32_t code = 0; + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; + + if (NULL != gMultiwayMergeFps[pInfo->type].getExplainFn) { + code = (*gMultiwayMergeFps[pInfo->type].getExplainFn)(pOptr, pOptrExplain, len); + } + + return code; +} + +SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams, + SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) { + SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode; + + SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc; + + int32_t code = TSDB_CODE_SUCCESS; + if (pInfo == NULL || pOperator == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _error; + } + + pInfo->groupMerge = pMergePhyNode->groupSort; + pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId; + pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder; + pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder; + pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId; + + pInfo->type = pMergePhyNode->type; + switch (pInfo->type) { + case MERGE_TYPE_SORT: { + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); + + SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); + SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); + + initResultSizeInfo(&pOperator->resultInfo, 1024); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); + int32_t rowSize = pInfo->binfo.pRes->info.rowSize; + int32_t numOfOutputCols = 0; + pSortMergeInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); + pSortMergeInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); + pSortMergeInfo->sortBufSize = pSortMergeInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. + pSortMergeInfo->pInputBlock = pInputBlock; + code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, + &pSortMergeInfo->matchInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + break; + } + case MERGE_TYPE_NON_SORT: { + SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo; + break; + } + case MERGE_TYPE_COLUMNS: { + SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo; + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); + initResultSizeInfo(&pOperator->resultInfo, 1); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + pColsMerge->pTargets = pMergePhyNode->pTargets; + pColsMerge->srcBlkIds[0] = getOperatorResultBlockId(downStreams[0], 0); + pColsMerge->srcBlkIds[1] = getOperatorResultBlockId(downStreams[1], 0); + break; + } + default: + qError("Invalid merge type: %d", pInfo->type); + code = TSDB_CODE_INVALID_PARA; + goto _error; + } + + setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL, + destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL); + + code = appendDownstream(pOperator, downStreams, numStreams); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + return pOperator; + +_error: + if (pInfo != NULL) { + destroyMultiwayMergeOperatorInfo(pInfo); + } + + pTaskInfo->code = code; + taosMemoryFree(pOperator); + return NULL; +} diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index ccef6640be..507dbe7ee2 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -675,293 +675,5 @@ _error: return NULL; } -//===================================================================================== -// Multiway Sort Merge operator -typedef struct SMultiwayMergeOperatorInfo { - SOptrBasicInfo binfo; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SLimitInfo limitInfo; - SArray* pSortInfo; - SSortHandle* pSortHandle; - SColMatchInfo matchInfo; - SSDataBlock* pInputBlock; - SSDataBlock* pIntermediateBlock; // to hold the intermediate result - int64_t startTs; // sort start time - bool groupSort; - bool ignoreGroupId; - uint64_t groupId; - STupleHandle* prefetchedTuple; - bool inputWithGroupId; -} SMultiwayMergeOperatorInfo; -int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } - - pInfo->startTs = taosGetTimestampUs(); - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); - - tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL); - tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupSort); - - for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { - SOperatorInfo* pDownstream = pOperator->pDownstream[i]; - if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { - pDownstream->fpSet._openFn(pDownstream); - } - - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); - ps->param = pDownstream; - ps->onlyRef = true; - - tsortAddSource(pInfo->pSortHandle, ps); - } - - int32_t code = tsortOpen(pInfo->pSortHandle); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, terrno); - } - - pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0; - pOperator->status = OP_RES_TO_RETURN; - - OPTR_SET_OPENED(pOperator); - return TSDB_CODE_SUCCESS; -} - -static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity, - SSDataBlock* p, bool* newgroup) { - *newgroup = false; - - while (1) { - STupleHandle* pTupleHandle = NULL; - if (pInfo->groupSort || pInfo->inputWithGroupId) { - if (pInfo->prefetchedTuple == NULL) { - pTupleHandle = tsortNextTuple(pHandle); - } else { - pTupleHandle = pInfo->prefetchedTuple; - pInfo->prefetchedTuple = NULL; - uint64_t gid = tsortGetGroupId(pTupleHandle); - if (gid != pInfo->groupId) { - *newgroup = true; - pInfo->groupId = gid; - } - } - } else { - pTupleHandle = tsortNextTuple(pHandle); - pInfo->groupId = 0; - } - - if (pTupleHandle == NULL) { - break; - } - - if (pInfo->groupSort || pInfo->inputWithGroupId) { - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - p->info.id.groupId = tupleGroupId; - pInfo->groupId = tupleGroupId; - } else { - if (p->info.rows == 0) { - appendOneRowToDataBlock(p, pTupleHandle); - p->info.id.groupId = pInfo->groupId = tupleGroupId; - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } - } - } else { - appendOneRowToDataBlock(p, pTupleHandle); - } - - if (p->info.rows >= capacity) { - break; - } - } -} - -SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo, - SOperatorInfo* pOperator) { - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - - int32_t capacity = pOperator->resultInfo.capacity; - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - blockDataCleanup(pDataBlock); - - if (pInfo->pIntermediateBlock == NULL) { - pInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle); - if (pInfo->pIntermediateBlock == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - blockDataEnsureCapacity(pInfo->pIntermediateBlock, capacity); - } else { - blockDataCleanup(pInfo->pIntermediateBlock); - } - - SSDataBlock* p = pInfo->pIntermediateBlock; - bool newgroup = false; - - while (1) { - doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup); - if (p->info.rows == 0) { - break; - } - - if (newgroup) { - resetLimitInfoForNextGroup(&pInfo->limitInfo); - } - - applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo); - - if (p->info.rows > 0) { - break; - } - } - - if (p->info.rows > 0) { - int32_t numOfCols = taosArrayGetSize(pColMatchInfo); - for (int32_t i = 0; i < numOfCols; ++i) { - SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - - SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); - SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); - colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info); - } - - pDataBlock->info.rows = p->info.rows; - pDataBlock->info.scanFlag = p->info.scanFlag; - if (pInfo->ignoreGroupId) { - pDataBlock->info.id.groupId = 0; - } else { - pDataBlock->info.id.groupId = pInfo->groupId; - } - pDataBlock->info.dataLoad = 1; - } - - qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId, - pDataBlock->info.rows); - - return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; -} - -SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - - int32_t code = pOperator->fpSet._openFn(pOperator); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, code); - } - - qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); - SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator); - if (pBlock != NULL) { - pOperator->resultInfo.totalRows += pBlock->info.rows; - } else { - setOperatorCompleted(pOperator); - } - - return pBlock; -} - -void destroyMultiwayMergeOperatorInfo(void* param) { - SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; - pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); - pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock); - pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock); - - tsortDestroySortHandle(pInfo->pSortHandle); - taosArrayDestroy(pInfo->pSortInfo); - taosArrayDestroy(pInfo->matchInfo.pList); - - taosMemoryFreeClear(param); -} - -int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); - - SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; - - *pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); - *pOptrExplain = pSortExecInfo; - - *len = sizeof(SSortExecInfo); - return TSDB_CODE_SUCCESS; -} - -SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams, - SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) { - SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode; - - SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc; - - int32_t code = TSDB_CODE_SUCCESS; - if (pInfo == NULL || pOperator == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _error; - } - - initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); - pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); - - int32_t rowSize = pInfo->binfo.pRes->info.rowSize; - int32_t numOfOutputCols = 0; - code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, - &pInfo->matchInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); - SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); - - initResultSizeInfo(&pOperator->resultInfo, 1024); - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - - pInfo->groupSort = pMergePhyNode->groupSort; - pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId; - pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); - pInfo->pInputBlock = pInputBlock; - size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); - pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); - pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. - pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder; - pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder; - pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId; - - setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL, - destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL); - - code = appendDownstream(pOperator, downStreams, numStreams); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - return pOperator; - -_error: - if (pInfo != NULL) { - destroyMultiwayMergeOperatorInfo(pInfo); - } - - pTaskInfo->code = code; - taosMemoryFree(pOperator); - return NULL; -} diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 50df1b5067..e626c937da 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -565,7 +565,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { numOfElem = pInput->numOfRows; pAvgRes->count += pInput->numOfRows; - bool simdAvailable = tsAVXEnable && tsSIMDBuiltins && (numOfRows > THRESHOLD_SIZE); + bool simdAvailable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE); switch(type) { case TSDB_DATA_TYPE_UTINYINT: diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index 3ca1c06303..a6c91a57ce 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -502,7 +502,7 @@ static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRo float* val = (float*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { @@ -533,7 +533,7 @@ static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfR double* val = (double*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 7060af6459..ce23928268 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -419,6 +419,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { COPY_SCALAR_FIELD(groupSort); CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_FIELD(pSubtable); + COPY_SCALAR_FIELD(cacheLastMode); COPY_SCALAR_FIELD(igLastNull); COPY_SCALAR_FIELD(groupOrderScan); COPY_SCALAR_FIELD(onlyMetaCtbIdx); @@ -443,8 +444,14 @@ static int32_t logicAggCopy(const SAggLogicNode* pSrc, SAggLogicNode* pDst) { COPY_BASE_OBJECT_FIELD(node, logicNodeCopy); CLONE_NODE_LIST_FIELD(pGroupKeys); CLONE_NODE_LIST_FIELD(pAggFuncs); + COPY_SCALAR_FIELD(hasLastRow); + COPY_SCALAR_FIELD(hasLast); + COPY_SCALAR_FIELD(hasTimeLineFunc); + COPY_SCALAR_FIELD(onlyHasKeepOrderFunc); COPY_SCALAR_FIELD(hasGroupKeyOptimized); + COPY_SCALAR_FIELD(isGroupTb); COPY_SCALAR_FIELD(isPartTb); + COPY_SCALAR_FIELD(hasGroup); return TSDB_CODE_SUCCESS; } @@ -488,6 +495,8 @@ static int32_t logicMergeCopy(const SMergeLogicNode* pSrc, SMergeLogicNode* pDst CLONE_NODE_LIST_FIELD(pInputs); COPY_SCALAR_FIELD(numOfChannels); COPY_SCALAR_FIELD(srcGroupId); + COPY_SCALAR_FIELD(colsMerge); + COPY_SCALAR_FIELD(needSort); COPY_SCALAR_FIELD(groupSort); COPY_SCALAR_FIELD(ignoreGroupId); COPY_SCALAR_FIELD(inputWithGroupId); diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index c9b49ee30f..f3087dd5d4 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -2285,6 +2285,7 @@ static const char* jkMergePhysiPlanSrcGroupId = "SrcGroupId"; static const char* jkMergePhysiPlanGroupSort = "GroupSort"; static const char* jkMergePhysiPlanIgnoreGroupID = "IgnoreGroupID"; static const char* jkMergePhysiPlanInputWithGroupId = "InputWithGroupId"; +static const char* jkMergePhysiPlanType = "Type"; static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) { const SMergePhysiNode* pNode = (const SMergePhysiNode*)pObj; @@ -2311,6 +2312,9 @@ static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddBoolToObject(pJson, jkMergePhysiPlanInputWithGroupId, pNode->inputWithGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkMergePhysiPlanType, pNode->type); + } return code; } @@ -2337,6 +2341,9 @@ static int32_t jsonToPhysiMergeNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetBoolValue(pJson, jkMergePhysiPlanIgnoreGroupID, &pNode->ignoreGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetIntValue(pJson, jkMergePhysiPlanType, (int32_t*)&pNode->type); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index ea59d93d7f..9804f2075b 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -2690,6 +2690,7 @@ enum { PHY_MERGE_CODE_GROUP_SORT, PHY_MERGE_CODE_IGNORE_GROUP_ID, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, + PHY_MERGE_CODE_TYPE, }; static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -2717,6 +2718,9 @@ static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeBool(pEncoder, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, pNode->inputWithGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeI32(pEncoder, PHY_MERGE_CODE_TYPE, pNode->type); + } return code; } @@ -2752,6 +2756,9 @@ static int32_t msgToPhysiMergeNode(STlvDecoder* pDecoder, void* pObj) { case PHY_MERGE_CODE_INPUT_WITH_GROUP_ID: code = tlvDecodeBool(pTlv, &pNode->inputWithGroupId); break; + case PHY_MERGE_CODE_TYPE: + code = tlvDecodeI32(pTlv, (int32_t*)&pNode->type); + break; default: break; } diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index ee22caf574..d167d81c82 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -1572,6 +1572,19 @@ int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc) { return code; } + +int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc) { + if (NULL == *pTarget) { + *pTarget = nodesMakeList(); + if (NULL == *pTarget) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_OUT_OF_MEMORY; + } + } + return nodesListStrictAppendList(*pTarget, pSrc); +} + + int32_t nodesListPushFront(SNodeList* pList, SNode* pNode) { if (NULL == pList || NULL == pNode) { return TSDB_CODE_FAILED; diff --git a/source/libs/parser/src/parCalcConst.c b/source/libs/parser/src/parCalcConst.c index 0657f1a43d..441f4da3b1 100644 --- a/source/libs/parser/src/parCalcConst.c +++ b/source/libs/parser/src/parCalcConst.c @@ -176,12 +176,15 @@ static int32_t calcConstStmtCondition(SCalcConstContext* pCxt, SNode** pCond, bo static EDealRes doFindAndReplaceNode(SNode** pNode, void* pContext) { SCalcConstContext* pCxt = pContext; if (pCxt->replaceCxt.pTarget == *pNode) { + char aliasName[TSDB_COL_NAME_LEN] = {0}; + strcpy(aliasName, ((SExprNode*)*pNode)->aliasName); nodesDestroyNode(*pNode); *pNode = nodesCloneNode(pCxt->replaceCxt.pNew); if (NULL == *pNode) { pCxt->code = TSDB_CODE_OUT_OF_MEMORY; return DEAL_RES_ERROR; } + strcpy(((SExprNode*)*pNode)->aliasName, aliasName); pCxt->replaceCxt.replaced = true; return DEAL_RES_END; @@ -211,7 +214,6 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d } char aliasName[TSDB_COL_NAME_LEN] = {0}; - strcpy(aliasName, ((SExprNode*)pProject)->aliasName); int32_t code = TSDB_CODE_SUCCESS; if (dual) { code = scalarCalculateConstantsFromDual(pProject, pNew); @@ -219,15 +221,20 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d code = scalarCalculateConstants(pProject, pNew); } if (TSDB_CODE_SUCCESS == code) { - strcpy(((SExprNode*)*pNew)->aliasName, aliasName); if (QUERY_NODE_VALUE == nodeType(*pNew) && NULL != pAssociation) { int32_t size = taosArrayGetSize(pAssociation); for (int32_t i = 0; i < size; ++i) { SAssociationNode* pAssNode = taosArrayGet(pAssociation, i); SNode** pCol = pAssNode->pPlace; if (*pCol == pAssNode->pAssociationNode) { + strcpy(aliasName, ((SExprNode*)*pCol)->aliasName); + SArray* pOrigAss = NULL; + TSWAP(((SExprNode*)*pCol)->pAssociation, pOrigAss); nodesDestroyNode(*pCol); *pCol = nodesCloneNode(*pNew); + TSWAP(pOrigAss, ((SExprNode*)*pCol)->pAssociation); + taosArrayDestroy(pOrigAss); + strcpy(((SExprNode*)*pCol)->aliasName, aliasName); if (NULL == *pCol) { code = TSDB_CODE_OUT_OF_MEMORY; break; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 3e43fef308..1a65a29259 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3925,6 +3925,267 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec return code; } +typedef struct SEqCondTbNameTableInfo { + SRealTableNode* pRealTable; + SArray* aTbnames; +} SEqCondTbNameTableInfo; + +//[tableAlias.]tbname = tbNamVal +static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTabNames) { + if (pOperator->opType != OP_TYPE_EQUAL) return false; + SFunctionNode* pTbnameFunc = NULL; + SValueNode* pValueNode = NULL; + if (nodeType(pOperator->pLeft) == QUERY_NODE_FUNCTION && + ((SFunctionNode*)(pOperator->pLeft))->funcType == FUNCTION_TYPE_TBNAME && + nodeType(pOperator->pRight) == QUERY_NODE_VALUE) { + pTbnameFunc = (SFunctionNode*)pOperator->pLeft; + pValueNode = (SValueNode*)pOperator->pRight; + } else if (nodeType(pOperator->pRight) == QUERY_NODE_FUNCTION && + ((SFunctionNode*)(pOperator->pRight))->funcType == FUNCTION_TYPE_TBNAME && + nodeType(pOperator->pLeft) == QUERY_NODE_VALUE) { + pTbnameFunc = (SFunctionNode*)pOperator->pRight; + pValueNode = (SValueNode*)pOperator->pLeft; + } else { + return false; + } + + if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) { + *ppTableAlias = NULL; + } else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) { + SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0); + if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false; + SValueNode* pQualValNode = (SValueNode*)pQualNode; + *ppTableAlias = pQualValNode->literal; + } else { + return false; + } + *ppTabNames = taosArrayInit(1, sizeof(void*)); + taosArrayPush(*ppTabNames, &(pValueNode->literal)); + return true; +} + +//[tableAlias.]tbname in (value1, value2, ...) +static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTbNames) { + if (pOperator->opType != OP_TYPE_IN) return false; + if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION || + ((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME || + nodeType(pOperator->pRight) != QUERY_NODE_NODE_LIST) { + return false; + } + + SFunctionNode* pTbnameFunc = (SFunctionNode*)pOperator->pLeft; + if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) { + *ppTableAlias = NULL; + } else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) { + SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0); + if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false; + SValueNode* pQualValNode = (SValueNode*)pQualNode; + *ppTableAlias = pQualValNode->literal; + } else { + return false; + } + *ppTbNames = taosArrayInit(1, sizeof(void*)); + SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight; + SNodeList* pValueNodeList = pValueListNode->pNodeList; + SNode* pValNode = NULL; + FOREACH(pValNode, pValueNodeList) { + if (nodeType(pValNode) != QUERY_NODE_VALUE) { + return false; + } + taosArrayPush(*ppTbNames, &((SValueNode*)pValNode)->literal); + } + return true; + +} + +static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) { + int32_t code = TSDB_CODE_SUCCESS; + char* pTableAlias = NULL; + char* pTbNameVal = NULL; + if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames) || + isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames)) { + STableNode* pTable; + if (pTableAlias == NULL) { + pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable; + } else { + code = findTable(pCxt, pTableAlias, &pTable); + } + if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE && + ((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { + pInfo->pRealTable = (SRealTableNode*)pTable; + return true; + } + taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; + } + return false; +} + +static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pTable) { + for (int i = 0; i < taosArrayGetSize(aTableTbNames); ++i) { + SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbNames, i); + if (info->pRealTable == pTable) { + return true; + } + } + return false; +} + +static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (bIsEqTbnameCond) { + if (!isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + //TODO: intersect tbNames of same table? speed + taosArrayPush(aTableTbnames, &info); + } else { + taosArrayDestroy(info.aTbnames); + } + } + } + //TODO: logic cond + } +} + +static void unionEqualCondTbnamesOfSameTable(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) { + bool bFoundTable = false; + for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { + SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i); + if (info->pRealTable == pInfo->pRealTable) { + taosArrayAddAll(info->aTbnames, pInfo->aTbnames); + taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; + bFoundTable = true; + break; + } + } + if (!bFoundTable) { + taosArrayPush(aTableTbnames, pInfo); + } +} + +static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + bool bAllTbName = true; + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + //TODO: logic cond + if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (!bIsEqTbnameCond) { + bAllTbName = false; + break; + } else { + unionEqualCondTbnamesOfSameTable(aTableTbnames, &info); + } + } else { + bAllTbName = false; + break; + } + } + if (!bAllTbName) { + for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i); + taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; + } + taosArrayClear(aTableTbnames); + } +} + +static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + //TODO: optimize nested and/or condition. now only the fist level is processed. + if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info); + if (bIsEqTbnameCond) { + taosArrayPush(aTableTbnames, &info); + } + } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { + if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { + findEqualCondTbnameInLogicCondAnd(pCxt, pWhere, aTableTbnames); + } else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) { + findEqualCondTbnameInLogicCondOr(pCxt, pWhere, aTableTbnames); + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) { + int32_t nVgroups = 0; + int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); + + if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) { + vgsInfo->numOfVgroups = 0; + return TSDB_CODE_SUCCESS; + } + + for (int j = 0; j < nTbls; ++j) { + char* dbName = pInfo->pRealTable->table.dbName; + SName snameTb; + char* tbName = taosArrayGetP(pInfo->aTbnames, j); + toName(pCxt->pParseCxt->acctId, dbName, tbName, &snameTb); + SVgroupInfo vgInfo; + bool bExists; + int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); + if (code == TSDB_CODE_SUCCESS && bExists) { + bool bFoundVg = false; + for (int32_t k = 0; k < nVgroups; ++k) { + if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) { + bFoundVg = true; + break; + } + } + if (!bFoundVg) { + vgsInfo->vgroups[nVgroups] = vgInfo; + ++nVgroups; + vgsInfo->numOfVgroups = nVgroups; + } + } else { + vgsInfo->numOfVgroups = 0; + break; + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) { + int32_t code = TSDB_CODE_SUCCESS; + for (int i = 0; i < taosArrayGetSize(aTables); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); + int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); + + SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo)); + int32_t nVgroups = 0; + findVgroupsFromEqualTbname(pCxt, pInfo, vgsInfo); + if (vgsInfo->numOfVgroups != 0) { + taosMemoryFree(pInfo->pRealTable->pVgroupList); + pInfo->pRealTable->pVgroupList = vgsInfo; + } else { + taosMemoryFree(vgsInfo); + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSelectStmt* pSelect) { + int32_t code = TSDB_CODE_SUCCESS; + SArray* aTables = taosArrayInit(1, sizeof(SEqCondTbNameTableInfo)); + code = findEqualCondTbname(pCxt, pSelect->pWhere, aTables); + if (code == TSDB_CODE_SUCCESS) { + code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables); + } + for (int i = 0; i < taosArrayGetSize(aTables); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); + taosArrayDestroy(pInfo->aTbnames); + } + taosArrayDestroy(aTables); + return code; +} + static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_WHERE; int32_t code = translateExpr(pCxt, &pSelect->pWhere); @@ -3934,6 +4195,9 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) { pSelect->isEmptyResult = true; } + if (pSelect->pWhere != NULL) { + setTableVgroupsFromEqualTbnameCond(pCxt, pSelect); + } return code; } diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index 83a4e9ced8..e2a4ded5a9 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -43,6 +43,7 @@ int32_t optimizeLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); +int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan); bool getBatchScanOptionFromHint(SNodeList* pList); bool getSortForGroupOptHint(SNodeList* pList); diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index e0d154a130..2adc5b3072 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -747,7 +747,8 @@ static int32_t createAggLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect, pAgg->isGroupTb = pAgg->pGroupKeys ? keysHasTbname(pAgg->pGroupKeys) : 0; pAgg->isPartTb = pSelect->pPartitionByList ? keysHasTbname(pSelect->pPartitionByList) : 0; - + pAgg->hasGroup = pAgg->pGroupKeys || pSelect->pPartitionByList; + if (TSDB_CODE_SUCCESS == code) { *pLogicNode = (SLogicNode*)pAgg; } else { diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 706fada4e8..4eda11a6a4 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -26,6 +26,7 @@ #define OPTIMIZE_FLAG_PUSH_DOWN_CONDE OPTIMIZE_FLAG_MASK(1) #define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask) +#define OPTIMIZE_FLAG_CLEAR_MASK(val, mask) (val) &= (~(mask)) #define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0) typedef struct SOptimizeContext { @@ -2499,21 +2500,7 @@ static bool lastRowScanOptCheckColNum(int32_t lastColNum, col_id_t lastColId, return true; } -static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || - QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { - return false; - } - - SAggLogicNode* pAgg = (SAggLogicNode*)pNode; - SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); - // Only one of LAST and LASTROW can appear - if (pAgg->hasLastRow == pAgg->hasLast || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions || - !hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) || - IS_TSWINDOW_SPECIFIED(pScan->scanRange)) { - return false; - } - +static bool lastRowScanOptCheckFuncList(SLogicNode* pNode, bool* hasOtherFunc) { bool hasNonPKSelectFunc = false; SNode* pFunc = NULL; int32_t lastColNum = 0, selectNonPKColNum = 0; @@ -2559,16 +2546,52 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { return false; } } else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) { - return false; + *hasOtherFunc = true; } } return true; } +static bool lastRowScanOptCheckLastCache(SAggLogicNode* pAgg, SScanLogicNode* pScan) { + // Only one of LAST and LASTROW can appear + if (pAgg->hasLastRow == pAgg->hasLast || (!pAgg->hasLast && !pAgg->hasLastRow) || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions || + !hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) || + IS_TSWINDOW_SPECIFIED(pScan->scanRange)) { + return false; + } + + return true; +} + +static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || + QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { + return false; + } + + SAggLogicNode* pAgg = (SAggLogicNode*)pNode; + SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); + if (!lastRowScanOptCheckLastCache(pAgg, pScan)) { + return false; + } + + bool hasOtherFunc = false; + if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) { + return false; + } + + if (hasOtherFunc) { + return false; + } + + return true; +} + typedef struct SLastRowScanOptSetColDataTypeCxt { bool doAgg; SNodeList* pLastCols; + SNodeList* pOtherCols; } SLastRowScanOptSetColDataTypeCxt; static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) { @@ -2611,6 +2634,33 @@ static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCo } } +static void lastRowScanOptRemoveUslessTargets(SNodeList* pTargets, SNodeList* pList1, SNodeList* pList2) { + SNode* pTarget = NULL; + WHERE_EACH(pTarget, pTargets) { + bool found = false; + SNode* pCol = NULL; + FOREACH(pCol, pList1) { + if (nodesEqualNode(pCol, pTarget)) { + found = true; + break; + } + } + if (!found) { + FOREACH(pCol, pList2) { + if (nodesEqualNode(pCol, pTarget)) { + found = true; + break; + } + } + } + if (!found) { + ERASE_NODE(pTargets); + continue; + } + WHERE_NEXT; + } +} + static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized); @@ -2618,7 +2668,7 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic return TSDB_CODE_SUCCESS; } - SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL}; + SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL, .pOtherCols = NULL}; SNode* pNode = NULL; SColumnNode* pPKTsCol = NULL; SColumnNode* pNonPKCol = NULL; @@ -2639,14 +2689,18 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt); nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1)); } - } else if (FUNCTION_TYPE_SELECT_VALUE == funcType) { + } else { pNode = nodesListGetNode(pFunc->pParameterList, 0); - if (nodeType(pNode) == QUERY_NODE_COLUMN) { - SColumnNode* pCol = (SColumnNode*)pNode; - if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - pPKTsCol = pCol; - } else { - pNonPKCol = pCol; + nodesListMakeAppend(&cxt.pOtherCols, pNode); + + if (FUNCTION_TYPE_SELECT_VALUE == funcType) { + if (nodeType(pNode) == QUERY_NODE_COLUMN) { + SColumnNode* pCol = (SColumnNode*)pNode; + if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { + pPKTsCol = pCol; + } else { + pNonPKCol = pCol; + } } } } @@ -2660,6 +2714,7 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true); nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt); lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false); + lastRowScanOptRemoveUslessTargets(pScan->node.pTargets, cxt.pLastCols, cxt.pOtherCols); if (pPKTsCol && pScan->node.pTargets->length == 1) { // when select last(ts),ts from ..., we add another ts to targets sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol); @@ -2679,6 +2734,208 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic return TSDB_CODE_SUCCESS; } + +static bool splitCacheLastFuncOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || + QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { + return false; + } + + SAggLogicNode* pAgg = (SAggLogicNode*)pNode; + SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); + if (!lastRowScanOptCheckLastCache(pAgg, pScan)) { + return false; + } + + bool hasOtherFunc = false; + if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) { + return false; + } + + if (pAgg->hasGroup || !hasOtherFunc) { + return false; + } + + return true; +} + +static int32_t splitCacheLastFuncOptCreateAggLogicNode(SAggLogicNode** pNewAgg, SAggLogicNode* pAgg, SNodeList* pFunc, SNodeList* pTargets) { + SAggLogicNode* pNew = (SAggLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_AGG); + if (NULL == pNew) { + nodesDestroyList(pFunc); + nodesDestroyList(pTargets); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pNew->hasLastRow = false; + pNew->hasLast = false; + pNew->hasTimeLineFunc = pAgg->hasTimeLineFunc; + pNew->hasGroupKeyOptimized = false; + pNew->onlyHasKeepOrderFunc = pAgg->onlyHasKeepOrderFunc; + pNew->node.groupAction = pAgg->node.groupAction; + pNew->node.requireDataOrder = pAgg->node.requireDataOrder; + pNew->node.resultDataOrder = pAgg->node.resultDataOrder; + pNew->node.pTargets = pTargets; + pNew->pAggFuncs = pFunc; + pNew->pGroupKeys = nodesCloneList(pAgg->pGroupKeys); + pNew->node.pConditions = nodesCloneNode(pAgg->node.pConditions); + pNew->isGroupTb = pAgg->isGroupTb; + pNew->isPartTb = pAgg->isPartTb; + pNew->hasGroup = pAgg->hasGroup; + pNew->node.pChildren = nodesCloneList(pAgg->node.pChildren); + + SNode* pNode = NULL; + FOREACH(pNode, pNew->node.pChildren) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + OPTIMIZE_FLAG_CLEAR_MASK(((SScanLogicNode*)pNode)->node.optimizedFlag, OPTIMIZE_FLAG_SCAN_PATH); + } + } + + *pNewAgg = pNew; + + return TSDB_CODE_SUCCESS; +} + +static int32_t splitCacheLastFuncOptModifyAggLogicNode(SAggLogicNode* pAgg) { + pAgg->hasTimeLineFunc = false; + pAgg->onlyHasKeepOrderFunc = true; + + return TSDB_CODE_SUCCESS; +} + +static int32_t splitCacheLastFuncOptCreateMergeLogicNode(SMergeLogicNode** pNew, SAggLogicNode* pAgg1, SAggLogicNode* pAgg2) { + SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); + if (NULL == pMerge) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pMerge->colsMerge = true; + pMerge->numOfChannels = 2; + pMerge->srcGroupId = -1; + pMerge->node.precision = pAgg1->node.precision; + + SNode* pNewAgg1 = nodesCloneNode((SNode*)pAgg1); + SNode* pNewAgg2 = nodesCloneNode((SNode*)pAgg2); + if (NULL == pNewAgg1 || NULL == pNewAgg2) { + nodesDestroyNode(pNewAgg1); + nodesDestroyNode(pNewAgg2); + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SAggLogicNode*)pNewAgg1)->node.pParent = (SLogicNode*)pMerge; + ((SAggLogicNode*)pNewAgg2)->node.pParent = (SLogicNode*)pMerge; + + SNode* pNode = NULL; + FOREACH(pNode, ((SAggLogicNode*)pNewAgg1)->node.pChildren) { + ((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg1; + } + FOREACH(pNode, ((SAggLogicNode*)pNewAgg2)->node.pChildren) { + ((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg2; + } + + int32_t code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg1->node.pTargets)); + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg2->node.pTargets)); + } + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg1); + } + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg2); + } + + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode(pNewAgg1); + nodesDestroyNode(pNewAgg2); + nodesDestroyNode((SNode*)pMerge); + } else { + *pNew = pMerge; + } + + return code; +} + +static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { + SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, splitCacheLastFuncOptMayBeOptimized); + + if (NULL == pAgg) { + return TSDB_CODE_SUCCESS; + } + + SNode* pNode = NULL; + SNodeList* pAggFuncList = NULL; + { + WHERE_EACH(pNode, pAgg->pAggFuncs) { + SFunctionNode* pFunc = (SFunctionNode*)pNode; + int32_t funcType = pFunc->funcType; + if (FUNCTION_TYPE_LAST_ROW != funcType && FUNCTION_TYPE_LAST != funcType && + FUNCTION_TYPE_SELECT_VALUE != funcType && FUNCTION_TYPE_GROUP_KEY != funcType) { + nodesListMakeStrictAppend(&pAggFuncList, nodesCloneNode(pNode)); + ERASE_NODE(pAgg->pAggFuncs); + continue; + } + WHERE_NEXT; + } + } + + if (NULL == pAggFuncList) { + planError("empty agg func list while splite projections, funcNum:%d", pAgg->pAggFuncs->length); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SNodeList* pTargets = NULL; + { + WHERE_EACH(pNode, pAgg->node.pTargets) { + SColumnNode* pCol = (SColumnNode*)pNode; + SNode* pFuncNode = NULL; + bool found = false; + FOREACH(pFuncNode, pAggFuncList) { + SFunctionNode* pFunc = (SFunctionNode*)pFuncNode; + if (0 == strcmp(pFunc->node.aliasName, pCol->colName)) { + nodesListMakeStrictAppend(&pTargets, nodesCloneNode(pNode)); + found = true; + break; + } + } + if (found) { + ERASE_NODE(pAgg->node.pTargets); + continue; + } + WHERE_NEXT; + } + } + + if (NULL == pTargets) { + planError("empty target func list while splite projections, targetsNum:%d", pAgg->node.pTargets->length); + nodesDestroyList(pAggFuncList); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SMergeLogicNode* pMerge = NULL; + SAggLogicNode* pNewAgg = NULL; + int32_t code = splitCacheLastFuncOptCreateAggLogicNode(&pNewAgg, pAgg, pAggFuncList, pTargets); + if (TSDB_CODE_SUCCESS == code) { + code = splitCacheLastFuncOptModifyAggLogicNode(pAgg); + } + if (TSDB_CODE_SUCCESS == code) { + code = splitCacheLastFuncOptCreateMergeLogicNode(&pMerge, pNewAgg, pAgg); + } + if (TSDB_CODE_SUCCESS == code) { + code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pAgg, (SLogicNode*)pMerge); + } + + nodesDestroyNode((SNode *)pAgg); + nodesDestroyNode((SNode *)pNewAgg); + + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode((SNode *)pMerge); + } + + pCxt->optimized = true; + return code; +} + + + // merge projects static bool mergeProjectsMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) { @@ -3762,6 +4019,7 @@ static const SOptimizeRule optimizeRuleSet[] = { {.pName = "MergeProjects", .optimizeFunc = mergeProjectsOptimize}, {.pName = "RewriteTail", .optimizeFunc = rewriteTailOptimize}, {.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize}, + {.pName = "splitCacheLastFunc", .optimizeFunc = splitCacheLastFuncOptimize}, {.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize}, {.pName = "TagScan", .optimizeFunc = tagScanOptimize}, {.pName = "TableCountScan", .optimizeFunc = tableCountScanOptimize}, diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 5cf3426e6f..598bce3133 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1951,41 +1951,60 @@ static int32_t createExchangePhysiNodeByMerge(SMergePhysiNode* pMerge) { return nodesListMakeStrictAppend(&pMerge->node.pChildren, (SNode*)pExchange); } -static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) { +static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) { + int32_t code = TSDB_CODE_SUCCESS; SMergePhysiNode* pMerge = (SMergePhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pMergeLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE); if (NULL == pMerge) { return TSDB_CODE_OUT_OF_MEMORY; } + if (pMergeLogicNode->colsMerge) { + pMerge->type = MERGE_TYPE_COLUMNS; + } else if (pMergeLogicNode->needSort) { + pMerge->type = MERGE_TYPE_SORT; + } else { + pMerge->type = MERGE_TYPE_NON_SORT; + } + pMerge->numOfChannels = pMergeLogicNode->numOfChannels; pMerge->srcGroupId = pMergeLogicNode->srcGroupId; pMerge->groupSort = pMergeLogicNode->groupSort; pMerge->ignoreGroupId = pMergeLogicNode->ignoreGroupId; pMerge->inputWithGroupId = pMergeLogicNode->inputWithGroupId; - int32_t code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc); + if (!pMergeLogicNode->colsMerge) { + code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc); - if (TSDB_CODE_SUCCESS == code) { - for (int32_t i = 0; i < pMerge->numOfChannels; ++i) { - code = createExchangePhysiNodeByMerge(pMerge); - if (TSDB_CODE_SUCCESS != code) { - break; + if (TSDB_CODE_SUCCESS == code) { + for (int32_t i = 0; i < pMerge->numOfChannels; ++i) { + code = createExchangePhysiNodeByMerge(pMerge); + if (TSDB_CODE_SUCCESS != code) { + break; + } } } - } - if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) { - code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys, - &pMerge->pMergeKeys); - } + if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) { + code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys, + &pMerge->pMergeKeys); + } - if (TSDB_CODE_SUCCESS == code) { - code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets, - &pMerge->pTargets); - } - if (TSDB_CODE_SUCCESS == code) { - code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + if (TSDB_CODE_SUCCESS == code) { + code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets, + &pMerge->pTargets); + } + if (TSDB_CODE_SUCCESS == code) { + code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + } + } else { + SDataBlockDescNode* pLeftDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 0))->pOutputDataBlockDesc; + SDataBlockDescNode* pRightDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 1))->pOutputDataBlockDesc; + + code = setListSlotId(pCxt, pLeftDesc->dataBlockId, pRightDesc->dataBlockId, pMergeLogicNode->node.pTargets, &pMerge->pTargets); + if (TSDB_CODE_SUCCESS == code) { + code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + } } if (TSDB_CODE_SUCCESS == code) { @@ -2023,7 +2042,7 @@ static int32_t doCreatePhysiNode(SPhysiPlanContext* pCxt, SLogicNode* pLogicNode case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: return createInterpFuncPhysiNode(pCxt, pChildren, (SInterpFuncLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_MERGE: - return createMergePhysiNode(pCxt, (SMergeLogicNode*)pLogicNode, pPhyNode); + return createMergePhysiNode(pCxt, pChildren, (SMergeLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_GROUP_CACHE: return createGroupCachePhysiNode(pCxt, pChildren, (SGroupCacheLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_DYN_QUERY_CTRL: diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index d7b3f51961..43bd8a5589 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -248,8 +248,6 @@ static bool stbSplHasMultiTbScan(bool streamQuery, SLogicNode* pNode) { } if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && stbSplIsMultiTbScan(streamQuery, (SScanLogicNode*)pChild)) { return true; - } else if (QUERY_NODE_LOGIC_PLAN_SORT == nodeType(pChild)) { - return stbSplHasMultiTbScan(streamQuery, (SLogicNode*)pChild); } return false; } @@ -540,11 +538,12 @@ static int32_t stbSplRewriteFromMergeNode(SMergeLogicNode* pMerge, SLogicNode* p } static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode, - SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort) { + SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort, bool needSort) { SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); if (NULL == pMerge) { return TSDB_CODE_OUT_OF_MEMORY; } + pMerge->needSort = needSort; pMerge->numOfChannels = stbSplGetNumOfVgroups(pPartChild); pMerge->srcGroupId = pCxt->groupId; pMerge->node.precision = pPartChild->precision; @@ -621,7 +620,7 @@ static int32_t stbSplSplitIntervalForBatch(SSplitContext* pCxt, SStableSplitInfo code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk, ((SWindowLogicNode*)pInfo->pSplitNode)->node.outputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true); + code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true, true); } if (TSDB_CODE_SUCCESS != code) { nodesDestroyList(pMergeKeys); @@ -712,7 +711,7 @@ static int32_t stbSplSplitSessionOrStateForBatch(SSplitContext* pCxt, SStableSpl ((SWindowLogicNode*)pWindow)->node.inputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true, true); } if (TSDB_CODE_SUCCESS == code) { @@ -982,7 +981,7 @@ static int32_t stbSplAggNodeCreateMerge(SSplitContext* pCtx, SStableSplitInfo* p } } } - code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort); + code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort, true); if (TSDB_CODE_SUCCESS == code && sortForGroup) { SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1); @@ -1145,7 +1144,7 @@ static int32_t stbSplSplitSortNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) bool groupSort = ((SSortLogicNode*)pInfo->pSplitNode)->groupSort; int32_t code = stbSplCreatePartSortNode((SSortLogicNode*)pInfo->pSplitNode, &pPartSort, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort, true); } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pInfo->pSplitNode); @@ -1195,7 +1194,7 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit SLogicNode* pSplitNode = NULL; int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, true); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, @@ -1269,7 +1268,7 @@ static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSub ((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset; ((SLimitNode*)pMergeScan->pLimit)->offset = 0; } - code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort); + code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort, true); } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pScan); @@ -1345,7 +1344,7 @@ static int32_t stbSplSplitPartitionNode(SSplitContext* pCxt, SStableSplitInfo* p code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys); } if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, true); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, @@ -1587,9 +1586,12 @@ typedef struct SSmaIndexSplitInfo { static bool smaIdxSplFindSplitNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode, SSmaIndexSplitInfo* pInfo) { if (QUERY_NODE_LOGIC_PLAN_MERGE == nodeType(pNode) && LIST_LENGTH(pNode->pChildren) > 1) { - pInfo->pMerge = (SMergeLogicNode*)pNode; - pInfo->pSubplan = pSubplan; - return true; + int32_t nodeType = nodeType(nodesListGetNode(pNode->pChildren, 0)); + if (nodeType == QUERY_NODE_LOGIC_PLAN_EXCHANGE || nodeType == QUERY_NODE_LOGIC_PLAN_MERGE) { + pInfo->pMerge = (SMergeLogicNode*)pNode; + pInfo->pSubplan = pSubplan; + return true; + } } return false; } diff --git a/source/libs/planner/src/planValidator.c b/source/libs/planner/src/planValidator.c new file mode 100755 index 0000000000..a5d729ab84 --- /dev/null +++ b/source/libs/planner/src/planValidator.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "planInt.h" + +#include "catalog.h" +#include "functionMgt.h" +#include "systable.h" +#include "tglobal.h" + +typedef struct SValidatePlanContext { + SPlanContext* pPlanCxt; + int32_t errCode; +} SValidatePlanContext; + +int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode); + +int32_t validateMergePhysiNode(SValidatePlanContext* pCxt, SMergePhysiNode* pMerge) { + if ((NULL != pMerge->node.pLimit || NULL != pMerge->node.pSlimit) && pMerge->type == MERGE_TYPE_NON_SORT) { + planError("no limit&slimit supported for non sort merge, pLimit:%p", pMerge->node.pLimit); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t validateSubplanNode(SValidatePlanContext* pCxt, SSubplan* pSubPlan) { + if (SUBPLAN_TYPE_MODIFY == pSubPlan->subplanType) { + return TSDB_CODE_SUCCESS; + } + return doValidatePhysiNode(pCxt, (SNode*)pSubPlan->pNode); +} + +int32_t validateQueryPlanNode(SValidatePlanContext* pCxt, SQueryPlan* pPlan) { + int32_t code = TSDB_CODE_SUCCESS; + SNode* pNode = NULL; + FOREACH(pNode, pPlan->pSubplans) { + if (QUERY_NODE_NODE_LIST != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + SNode* pSubNode = NULL; + SNodeListNode* pSubplans = (SNodeListNode*)pNode; + FOREACH(pSubNode, pSubplans->pNodeList) { + if (QUERY_NODE_PHYSICAL_SUBPLAN != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + code = doValidatePhysiNode(pCxt, pSubNode); + if (code) { + break; + } + } + } + + return code; +} + +int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode) { + switch (nodeType(pNode)) { + case QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_PROJECT: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: + case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: + case QUERY_NODE_PHYSICAL_PLAN_EXCHANGE: + break; + case QUERY_NODE_PHYSICAL_PLAN_MERGE: + return validateMergePhysiNode(pCxt, (SMergePhysiNode*)pNode); + case QUERY_NODE_PHYSICAL_PLAN_SORT: + case QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT: + case QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_FILL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE: + case QUERY_NODE_PHYSICAL_PLAN_PARTITION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION: + case QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC: + case QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC: + case QUERY_NODE_PHYSICAL_PLAN_DISPATCH: + case QUERY_NODE_PHYSICAL_PLAN_INSERT: + case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT: + case QUERY_NODE_PHYSICAL_PLAN_DELETE: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT: + case QUERY_NODE_PHYSICAL_PLAN_HASH_JOIN: + case QUERY_NODE_PHYSICAL_PLAN_GROUP_CACHE: + case QUERY_NODE_PHYSICAL_PLAN_DYN_QUERY_CTRL: + break; + case QUERY_NODE_PHYSICAL_SUBPLAN: + return validateSubplanNode(pCxt, (SSubplan*)pNode); + case QUERY_NODE_PHYSICAL_PLAN: + return validateQueryPlanNode(pCxt, (SQueryPlan *)pNode); + default: + break; + } + + return TSDB_CODE_SUCCESS; +} + +static void destoryValidatePlanContext(SValidatePlanContext* pCxt) { + +} + +int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan) { + SValidatePlanContext cxt = {.pPlanCxt = pCxt, + .errCode = TSDB_CODE_SUCCESS + }; + + int32_t code = TSDB_CODE_SUCCESS; + SNode* pNode = NULL; + FOREACH(pNode, pPlan->pSubplans) { + if (QUERY_NODE_NODE_LIST != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + SNode* pSubNode = NULL; + SNodeListNode* pSubplans = (SNodeListNode*)pNode; + FOREACH(pSubNode, pSubplans->pNodeList) { + code = doValidatePhysiNode(&cxt, pSubNode); + if (code) { + break; + } + } + if (code) { + break; + } + } + + destoryValidatePlanContext(&cxt); + return code; +} diff --git a/source/libs/planner/src/planner.c b/source/libs/planner/src/planner.c index 6dd9c544cc..a4a33b30fd 100644 --- a/source/libs/planner/src/planner.c +++ b/source/libs/planner/src/planner.c @@ -57,6 +57,9 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo if (TSDB_CODE_SUCCESS == code) { code = createPhysiPlan(pCxt, pLogicPlan, pPlan, pExecNodeList); } + if (TSDB_CODE_SUCCESS == code) { + code = validateQueryPlan(pCxt, *pPlan); + } if (TSDB_CODE_SUCCESS == code) { dumpQueryPlan(*pPlan); } diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index dbdd79cc65..e7c6297f44 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -1230,7 +1230,6 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128); if (code) { qError("func to_timestamp failed %s", errMsg); - code = code == -1 ? TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR : TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR; break; } colDataSetVal(pOutput->columnData, i, (char *)&ts, false); diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 95031505dc..3eadea3cdd 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -44,7 +44,7 @@ typedef struct { int64_t defaultCfInit; } SBackendWrapper; -void* streamBackendInit(const char* path, int64_t chkpId); +void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 8f93f259ef..c23483fffb 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -469,11 +469,11 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { return 0; } -void* streamBackendInit(const char* streamPath, int64_t chkpId) { +void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); - stDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); + stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId); uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); @@ -534,7 +534,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) { if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - stDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); + stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); taosMemoryFreeClear(backendPath); return (void*)pHandle; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 5479a2dab2..5540e3b6fd 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -299,9 +299,12 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { continue; } - ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); + ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId && + p->chkInfo.checkpointVer <= p->chkInfo.processedVer); p->chkInfo.checkpointId = p->checkpointingId; + p->chkInfo.checkpointVer = p->chkInfo.processedVer; + streamTaskClearCheckInfo(p); char* str = NULL; diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 80927b36b9..f6ec6e9fdb 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -129,6 +129,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) { void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) { ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT); taosMemoryFree(pDataSubmit->submit.msgStr); + taosFreeQitem(pDataSubmit); } SStreamMergedSubmit* streamMergedSubmitNew() { @@ -208,12 +209,10 @@ void streamFreeQitem(SStreamQueueItem* data) { if (type == STREAM_INPUT__GET_RES) { blockDataDestroy(((SStreamTrigger*)data)->pBlock); taosFreeQitem(data); - } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__TRANS_STATE) { - taosArrayDestroyEx(((SStreamDataBlock*)data)->blocks, (FDelete)blockDataFreeRes); - taosFreeQitem(data); + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE) { + destroyStreamDataBlock((SStreamDataBlock*)data); } else if (type == STREAM_INPUT__DATA_SUBMIT) { streamDataSubmitDestroy((SStreamDataSubmit*)data); - taosFreeQitem(data); } else if (type == STREAM_INPUT__MERGED_SUBMIT) { SStreamMergedSubmit* pMerge = (SStreamMergedSubmit*)data; @@ -228,7 +227,7 @@ void streamFreeQitem(SStreamQueueItem* data) { SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; blockDataDestroy(pRefBlock->pBlock); taosFreeQitem(pRefBlock); - } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) { SStreamDataBlock* pBlock = (SStreamDataBlock*) data; taosArrayDestroyEx(pBlock->blocks, freeItems); taosFreeQitem(pBlock); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index a6101b0932..cae537a860 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -593,7 +593,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { const SStreamQueueItem* pItem = pInput; stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); - int64_t ver = pTask->chkInfo.checkpointVer; + int64_t ver = pTask->chkInfo.processedVer; doSetStreamInputBlock(pTask, pInput, &ver, id); int64_t resSize = 0; @@ -604,13 +604,16 @@ int32_t streamExecForAll(SStreamTask* pTask) { stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, SIZE_IN_MiB(resSize), totalBlocks); - // update the currentVer if processing the submit blocks. - ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer); + SCheckpointInfo* pInfo = &pTask->chkInfo; - if (ver != pTask->chkInfo.checkpointVer) { - stDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 ", nextProcessVer:%" PRId64, - pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer); - pTask->chkInfo.checkpointVer = ver; + // update the currentVer if processing the submit blocks. + ASSERT(pInfo->checkpointVer <= pInfo->nextProcessVer && ver >= pInfo->checkpointVer); + + if (ver != pInfo->processedVer) { + stDebug("s-task:%s update processedVer(unsaved) from %" PRId64 " to %" PRId64 " nextProcessVer:%" PRId64 + " ckpt:%" PRId64, + pTask->id.idStr, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer); + pInfo->processedVer = ver; } streamFreeQitem(pInput); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 93dcd966b2..7013b43a6f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -194,10 +194,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosInitRWLatch(&pMeta->chkpDirLock); pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId); while (pMeta->streamBackend == NULL) { taosMsleep(100); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); if (pMeta->streamBackend == NULL) { stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); } @@ -262,7 +262,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta) { } } - while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId)) == NULL) { + // todo: not wait in a critical region + while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId)) == NULL) { stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); taosMsleep(100); } @@ -852,6 +853,37 @@ static void clearHbMsg(SStreamHbMsg* pMsg, SArray* pIdList) { taosArrayDestroy(pIdList); } +static bool existInHbMsg(SStreamHbMsg* pMsg, SDownstreamTaskEpset* pTaskEpset) { + int32_t numOfExisted = taosArrayGetSize(pMsg->pUpdateNodes); + for (int k = 0; k < numOfExisted; ++k) { + if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(pMsg->pUpdateNodes, k)) { + return true; + } + } + return false; +} + +static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) { + SStreamMeta* pMeta = pTask->pMeta; + + taosThreadMutexLock(&pTask->lock); + + int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList); + for (int j = 0; j < num; ++j) { + SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, j); + + bool exist = existInHbMsg(pMsg, pTaskEpset); + if (!exist) { + taosArrayPush(pMsg->pUpdateNodes, &pTaskEpset->nodeId); + stDebug("vgId:%d nodeId:%d added into hb update list, total:%d", pMeta->vgId, pTaskEpset->nodeId, + (int32_t)taosArrayGetSize(pMsg->pUpdateNodes)); + } + } + + taosArrayClear(pTask->outputInfo.pDownstreamUpdateList); + taosThreadMutexUnlock(&pTask->lock); +} + void metaHbToMnode(void* param, void* tmrId) { int64_t rid = *(int64_t*)param; @@ -947,28 +979,7 @@ void metaHbToMnode(void* param, void* tmrId) { walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd); } - taosThreadMutexLock(&(*pTask)->lock); - int32_t num = taosArrayGetSize((*pTask)->outputInfo.pDownstreamUpdateList); - for (int j = 0; j < num; ++j) { - int32_t* pNodeId = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j); - - bool exist = false; - int32_t numOfExisted = taosArrayGetSize(hbMsg.pUpdateNodes); - for (int k = 0; k < numOfExisted; ++k) { - if (*pNodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) { - exist = true; - break; - } - } - - if (!exist) { - taosArrayPush(hbMsg.pUpdateNodes, pNodeId); - } - } - - taosArrayClear((*pTask)->outputInfo.pDownstreamUpdateList); - taosThreadMutexUnlock(&(*pTask)->lock); - + addUpdateNodeIntoHbMsg(*pTask, &hbMsg); taosArrayPush(hbMsg.pTaskStatus, &entry); if (!hasMnodeEpset) { epsetAssign(&epset, &(*pTask)->info.mnodeEpset); @@ -1008,7 +1019,7 @@ void metaHbToMnode(void* param, void* tmrId) { pMeta->pHbInfo->hbCount += 1; - stDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, + stDebug("vgId:%d build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, pMeta->pHbInfo->hbCount); tmsgSendReq(&epset, &msg); } else { diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 63ee702ada..556de169b4 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -270,7 +270,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); - taosFreeQitem(pItem); return -1; } @@ -280,7 +279,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { streamDataSubmitDestroy(px); - taosFreeQitem(pItem); return code; } @@ -296,13 +294,13 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); - destroyStreamDataBlock((SStreamDataBlock*)pItem); + streamFreeQitem(pItem); return -1; } int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - destroyStreamDataBlock((SStreamDataBlock*)pItem); + streamFreeQitem(pItem); return code; } @@ -312,7 +310,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) type == STREAM_INPUT__TRANS_STATE) { int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - taosFreeQitem(pItem); + streamFreeQitem(pItem); return code; } @@ -323,7 +321,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) // use the default memory limit, refactor later. int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - taosFreeQitem(pItem); + streamFreeQitem(pItem); return code; } diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index e672b256da..0b2bf6b4ba 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -562,7 +562,6 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { taosMemoryFree(pBlock); if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) { - taosFreeQitem(pTranstate); return TSDB_CODE_OUT_OF_MEMORY; } @@ -1084,7 +1083,7 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { - pStartInfo->readyTs = pTask->execInfo.start; + pStartInfo->readyTs = taosGetTimestampMs(); pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64 diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a7fb590d1b..24228c0307 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -431,8 +431,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMeta = pMeta; - pTask->chkInfo.checkpointVer = ver - 1; - pTask->chkInfo.nextProcessVer = ver; + pTask->chkInfo.checkpointVer = ver - 1; // only update when generating checkpoint + pTask->chkInfo.processedVer = ver - 1; // already processed version + + pTask->chkInfo.nextProcessVer = ver; // next processed version pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.minVer = ver; pTask->pMsgCb = pMsgCb; diff --git a/source/libs/stream/test/tstreamUpdateTest.cpp b/source/libs/stream/test/tstreamUpdateTest.cpp index f63939ac9e..1b999e5fb0 100644 --- a/source/libs/stream/test/tstreamUpdateTest.cpp +++ b/source/libs/stream/test/tstreamUpdateTest.cpp @@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test { protected: virtual void SetUp() { streamMetaInit(); - backend = streamBackendInit(path, 0); + backend = streamBackendInit(path, 0, 0); } virtual void TearDown() { streamMetaCleanup(); diff --git a/source/os/src/osEnv.c b/source/os/src/osEnv.c index 0fc136c693..54107db325 100644 --- a/source/os/src/osEnv.c +++ b/source/os/src/osEnv.c @@ -37,11 +37,12 @@ float tsNumOfCores = 0; int64_t tsTotalMemoryKB = 0; char *tsProcPath = NULL; -char tsSIMDBuiltins = 0; +char tsSIMDEnable = 0; char tsSSE42Enable = 0; char tsAVXEnable = 0; char tsAVX2Enable = 0; char tsFMAEnable = 0; +char tsAVX512Enable = 0; void osDefaultInit() { taosSeedRand(taosSafeRand()); diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 4816ec8f8b..fea7a4f63d 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -250,7 +250,7 @@ void taosGetSystemInfo() { taosGetCpuCores(&tsNumOfCores, false); taosGetTotalMemory(&tsTotalMemoryKB); taosGetCpuUsage(NULL, NULL); - taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable); + taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable); #endif } @@ -602,7 +602,7 @@ void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { : "0"(level)) // todo add for windows and mac -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512) { #ifdef WINDOWS #elif defined(_TD_DARWIN_64) #else @@ -610,12 +610,6 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { #ifdef _TD_X86_ // Since the compiler is not support avx/avx2 instructions, the global variables always need to be // set to be false -//#if __AVX__ || __AVX2__ -// tsSIMDBuiltins = true; -//#else -// tsSIMDBuiltins = false; -//#endif - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; int32_t ret = __get_cpuid(1, &eax, &ebx, &ecx, &edx); @@ -631,6 +625,7 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { // Ref to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 __cpuid_fix(7u, eax, ebx, ecx, edx); *avx2 = (char) ((ebx & bit_AVX2) == bit_AVX2); + *avx512 = (char)((ebx & bit_AVX512F) == bit_AVX512F); #endif // _TD_X86_ #endif diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 3fc3ef6be6..dc89a24180 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -283,7 +283,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha int32_t batch = num >> 2; int32_t remain = num & 0x03; if (selector == 0 || selector == 1) { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { for (int32_t i = 0; i < batch; ++i) { __m256i prev = _mm256_set1_epi64x(prev_value); _mm256_storeu_si256((__m256i *)&p[_pos], prev); @@ -300,7 +300,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha } } } else { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { __m256i base = _mm256_set1_epi64x(w); __m256i maskVal = _mm256_set1_epi64x(mask); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 6c964c3da0..bcdbb3e3ac 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -613,8 +613,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_FUNTION_PARA_TYPE, "Invalid function par TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_FUNTION_PARA_VALUE, "Invalid function para value") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_NOT_BUILTIN_FUNTION, "Not buildin function") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_DUP_TIMESTAMP, "Duplicate timestamps not allowed in function") -TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR, "Func to_timestamp failed, format mismatch") -TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR, "Func to_timestamp failed, wrong timestamp") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR, "Func to_timestamp failed for format mismatch") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR, "Func to_timestamp failed for wrong timestamp") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED, "Func to_timestamp failed for unsupported timestamp format") //udf TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 6704e01335..6a7c0b47ec 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -21,6 +21,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 2 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 3 @@ -1182,6 +1183,7 @@ e ,,y,script,./test.sh -f tsim/sma/sma_leak.sim ,,y,script,./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQueryDelete.sim ,,y,script,./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim ,,y,script,./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ,,n,script,./test.sh -f tsim/valgrind/checkError1.sim @@ -1297,6 +1299,8 @@ e ,,y,script,./test.sh -f tsim/tagindex/add_index.sim ,,n,script,./test.sh -f tsim/tagindex/sma_and_tag_index.sim ,,y,script,./test.sh -f tsim/view/view.sim +,,y,script,./test.sh -f tsim/query/cache_last.sim +,,y,script,./test.sh -f tsim/query/const.sim #develop test diff --git a/tests/script/tsim/query/cache_last.sim b/tests/script/tsim/query/cache_last.sim new file mode 100644 index 0000000000..8247a2f723 --- /dev/null +++ b/tests/script/tsim/query/cache_last.sim @@ -0,0 +1,105 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists db1; +sql create database if not exists db1 cachemodel 'both' cachesize 10; +sql use db1; +sql create stable sta (ts timestamp, f1 double, f2 binary(200)) tags(t1 int); +sql create table tba1 using sta tags(1); +sql insert into tba1 values ('2022-04-26 15:15:01', 1.0, "a"); +sql insert into tba1 values ('2022-04-26 15:15:02', 2.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:04', 4.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:05', 5.0, "b"); +sql create table tba2 using sta tags(2); +sql insert into tba2 values ('2022-04-26 15:15:01', 1.2, "a"); +sql insert into tba2 values ('2022-04-26 15:15:02', 2.2, "b"); +sql create table tba3 using sta tags(3); +sql insert into tba3 values ('2022-04-26 15:15:10', 1.3, "a"); +sql insert into tba3 values ('2022-04-26 15:15:11', 2.3, "b"); +sql select count(*), last(*) from sta; +if $rows != 1 then + return -1 +endi +if $data00 != 8 then + return -1 +endi +if $data01 != @22-04-26 15:15:11.000@ then + return -1 +endi +if $data02 != 2.300000000 then + return -1 +endi +if $data03 != b then + return -1 +endi +sql explain select count(*), last(*) from sta; +if $data00 != @-> Merge (columns=4 width=226 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql explain select first(f1), last(*) from sta; +if $data00 != @-> Merge (columns=4 width=226 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql select first(f1), last(*) from sta; +if $rows != 1 then + return -1 +endi +sql select last_row(f1), last(f1) from sta; +if $rows != 1 then + return -1 +endi +sql select count(*), last_row(f1), last(f1) from sta; +if $rows != 1 then + return -1 +endi +sql explain select count(*), last_row(f1), last(f1) from sta; +if $data00 != @-> Aggragate (functions=3 width=24 input_order=desc )@ then + return -1 +endi +sql_error select count(*), last_row(f1), min(f1), f1 from sta; +sql select count(*), last_row(f1), min(f1),tbname from sta partition by tbname; +if $rows != 3 then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),tbname from sta partition by tbname; +if $data00 != @-> Data Exchange 2:1 (width=296)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1) from sta; +if $data00 != @-> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),tbname from sta group by tbname; +if $data00 != @-> Data Exchange 2:1 (width=296)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),t1 from sta partition by t1; +if $data00 != @-> Aggragate (functions=4 width=28 input_order=desc )@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),t1 from sta group by t1; +if $data00 != @-> Aggragate (functions=4 width=28 input_order=desc )@ then + return -1 +endi +sql explain select distinct count(*), last_row(f1), min(f1) from sta; +if $data10 != @ -> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + print $data10 + return -1 +endi +sql explain select count(*), last_row(f1), min(f1) from sta interval(1s); +if $data10 != @ -> Merge (columns=4 width=66 input_order=asc output_order=asc mode=sort)@ then + return -1 +endi +sql explain select distinct count(*), last_row(f1), min(f1) from tba1; +if $data10 != @ -> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql select distinct count(*), last_row(f1), min(f1) from tba1; +if $rows != 1 then + return -1 +endi + + +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/query/const.sim b/tests/script/tsim/query/const.sim new file mode 100644 index 0000000000..08f2b909c1 --- /dev/null +++ b/tests/script/tsim/query/const.sim @@ -0,0 +1,11 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql select b.z from (select c.a as z from (select 'a' as a) c) b; +if $rows != 1 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim new file mode 100644 index 0000000000..b93f6f0c44 --- /dev/null +++ b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim @@ -0,0 +1,540 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print =============== create database with retentions +sql create database d0 retentions -:7d,10s:21d,15s:365d vgroups 1; +sql use d0 + +print =============== create super table and register rsma +sql create table if not exists stb (ts timestamp, c1 float, c2 double) tags (city binary(20),district binary(20)) rollup(sum) max_delay 1s,1s; + +sql show stables +if $rows != 1 then + return -1 +endi + +print =============== create child table +sql create table ct1 using stb tags("BeiJing", "ChaoYang"); + +sql show tables +if $rows != 1 then + return -1 +endi + +print =============== insert data and trigger rollup +sql insert into ct1 values(now, 10, NULL); +sql insert into ct1 values(now+60m, 1, NULL); +sql insert into ct1 values(now+120m, 100, NULL); + +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory +sql select * from ct1; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 + +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 + +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== delete row 0 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory after delete row 0 +sql select * from ct1; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 0 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 0 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== delete row 1 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; + +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory after delete row 1 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 1 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 1 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait 7 seconds for results after reboot +sleep 7000 + +print =============== select * from retention level 2 from memory after reboot +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after reboot +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after reboot +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +#==================== flush database to trigger commit data to file +sql flush database d0; +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start + +print =============== select * from retention level 2 from file +sql select * from ct1 where ts > now-365d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from file +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from file +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== delete row 2 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 120m; +sql delete from ct1 where ts < now + 200m; +sql delete from ct1 where ts < now + 300m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now; + +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory after delete row 2 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 2 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 2 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait 7 seconds for results after reboot +sleep 7000 + +print =============== select * from retention level 2 from memory after delete row 2 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 2 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 2 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== check delete multiple tables +sql create table ct2 using stb tags("BeiJing", "HaiDian"); +sql create table ct3 using stb tags("ShangHai", "PuDong"); + +sql insert into ct2 values(now, 10, NULL); +sql insert into ct2 values(now+60m, 1, NULL); +sql insert into ct2 values(now+120m, 100, NULL); +sql insert into ct3 values(now, 10, NULL); +sql insert into ct3 values(now+60m, 1, NULL); +sql insert into ct3 values(now+120m, 100, NULL); + +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory +sql select * from ct2; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +sql delete from ct1 where ts < now + 120m; +sql delete from ct3 where ts < now; +sql delete from ct2 where ts < now + 60m; +sql delete from ct2 where ts < now + 120m; +sql delete from ct3 where ts < now + 60m; +sql delete from ct3 where ts < now + 120m; +sql delete from ct3 where ts < now; + +print =============== wait 7 seconds for results +sleep 7000 + +print =============== select * from retention level 2 from memory after delete ct2 +sql select * from ct2; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete ct3 +sql select * from ct3 where ts > now - 8d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +#=================================================================== +system sh/exec.sh -n dnode1 -s start +print =============== wait 7 seconds for results after reboot +sleep 7000 + +print =============== select * from retention level 1 from memory after delete ct2 +sql select * from ct2 where ts > now - 8d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 2 from memory after delete ct3 +sql select * from ct3 where ts > now - 365d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +#=================================================================== \ No newline at end of file diff --git a/tests/script/tsim/testsuit.sim b/tests/script/tsim/testsuit.sim index 0abe56ab3c..c208a07488 100644 --- a/tests/script/tsim/testsuit.sim +++ b/tests/script/tsim/testsuit.sim @@ -130,5 +130,6 @@ run tsim/sync/3Replica1VgElect.sim run tsim/sync/threeReplica1VgElectWihtInsert.sim run tsim/sma/tsmaCreateInsertQuery.sim run tsim/sma/rsmaCreateInsertQuery.sim +run tsim/sma/rsmaCreateInsertQueryDelete.sim run tsim/valgrind/basic.sim run tsim/valgrind/checkError.sim \ No newline at end of file diff --git a/tests/script/win-test-file b/tests/script/win-test-file index fe5f5c39e3..b2d50ade8a 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -319,6 +319,7 @@ ./test.sh -f tsim/sma/sma_leak.sim ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +./test.sh -f tsim/sma/rsmaCreateInsertQueryDelete.sim ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim ./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ./test.sh -f tsim/valgrind/checkError1.sim diff --git a/tests/system-test/2-query/partition_by_col_agg.py b/tests/system-test/2-query/partition_by_col_agg.py index 011415867b..1bc7a2414a 100644 --- a/tests/system-test/2-query/partition_by_col_agg.py +++ b/tests/system-test/2-query/partition_by_col_agg.py @@ -210,7 +210,7 @@ class TDTestCase: #self.check_explain_res_has_row('SortMerge', explain_res) #self.check_explain_res_has_row("blocking=0", explain_res) explain_res = self.explain_sql(sql_hint) - self.check_explain_res_has_row('SortMerge', explain_res) + self.check_explain_res_has_row('Merge', explain_res) self.check_explain_res_has_row('blocking=0', explain_res) def test_pipelined_agg_plan_with_slimit(self): diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py new file mode 100644 index 0000000000..564c78b47a --- /dev/null +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -0,0 +1,210 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TD-] + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), True) + self.conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use tbname_vgroup") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists tbname_vgroup") + tdSql.execute("create database if not exists tbname_vgroup") + tdSql.execute('use tbname_vgroup') + tdSql.execute('drop database if exists dbvg') + tdSql.execute('create database dbvg vgroups 8;') + + tdSql.execute('use dbvg;') + + tdSql.execute('create table st(ts timestamp, f int) tags (t int);') + + tdSql.execute("insert into ct1 using st tags(1) values('2021-04-19 00:00:01', 1)") + + tdSql.execute("insert into ct2 using st tags(2) values('2021-04-19 00:00:02', 2)") + + tdSql.execute("insert into ct3 using st tags(3) values('2021-04-19 00:00:03', 3)") + + tdSql.execute("insert into ct4 using st tags(4) values('2021-04-19 00:00:04', 4)") + + tdSql.query("select * from st where tbname='ct1'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + + tdSql.query("select * from st where tbname='ct3'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(0, 1, 3) + tdSql.checkData(0, 2, 3) + + tdSql.query("select * from st where tbname='ct3' and f=2") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname='ct1' and tbname='ct4'") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname='ct1' or tbname='ct4' order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(1, 1, 4) + tdSql.checkData(1, 2, 4) + + tdSql.query("select * from st where tbname='ct2' or tbname='ct3' order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(0, 1, 2) + tdSql.checkData(0, 2, 2) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(1, 1, 3) + tdSql.checkData(1, 2, 3) + + tdSql.query("select * from st where tbname='ct1' or tbname='ct4' or tbname='ct3' or tbname='ct2' order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("select * from st where tbname='ct4' or 1=1 order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("select * from st where tbname in ('ct1') order by ts") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname in ('ct3', 'ct4') order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname='ct3' order by ts") + tdSql.checkRows(3) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') and tbname='ct3' order by ts") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname in ('ct1') or 1=1 order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("explain select * from st where tbname='ct1'") + tdSql.checkRows(2) + + tdSql.query("select table_name, vgroup_id from information_schema.ins_tables where db_name='dbvg' and type='CHILD_TABLE'"); + print(tdSql.queryResult); + + tdSql.query("explain select * from st where tbname in ('ct1', 'ct2')") + if tdSql.queryResult[0][0].count("Data Exchange 2:1") == 0: + tdLog.exit("failed, not two vgroups") + else: + tdLog.info("select * from st where tbname in ('ct1', 'ct2') involves two vgroups") + + tdSql.execute('create table st2(ts timestamp, f int) tags (t int);') + + tdSql.execute("insert into ct21 using st2 tags(1) values('2021-04-19 00:00:01', 1)") + + tdSql.execute("insert into ct22 using st2 tags(2) values('2021-04-19 00:00:02', 2)") + + tdSql.execute("insert into ct23 using st2 tags(3) values('2021-04-19 00:00:03', 3)") + + tdSql.execute("insert into ct24 using st2 tags(4) values('2021-04-19 00:00:04', 4)") + + tdSql.query("select * from st, st2 where st.ts=st2.ts and st.tbname in ('ct1', 'ct2') and st2.tbname in ('ct21', 'ct23')"); + tdSql.checkRows(1); + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(0, 3, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 4, 1) + tdSql.checkData(0, 5, 1) + + #tdSql.execute('drop database dbvg;') + + tdSql.execute('drop database tbname_vgroup') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase())