diff --git a/cmake/mxml_CMakeLists.txt.in b/cmake/mxml_CMakeLists.txt.in index 9dcb5df665..1ac90ebdd4 100644 --- a/cmake/mxml_CMakeLists.txt.in +++ b/cmake/mxml_CMakeLists.txt.in @@ -1,7 +1,7 @@ # cos ExternalProject_Add(mxml GIT_REPOSITORY https://github.com/michaelrsweet/mxml.git - GIT_TAG release-2.10 + GIT_TAG release-2.12 SOURCE_DIR "${TD_CONTRIB_DIR}/mxml" #BINARY_DIR "" BUILD_IN_SOURCE TRUE diff --git a/docs/en/12-taos-sql/01-data-type.md b/docs/en/12-taos-sql/01-data-type.md index f81aaceca3..020eb27cfe 100644 --- a/docs/en/12-taos-sql/01-data-type.md +++ b/docs/en/12-taos-sql/01-data-type.md @@ -43,6 +43,8 @@ In TDengine, the data types below can be used when specifying a column or tag. | 15 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type. | | 16 | VARCHAR | User-defined | Alias of BINARY | | 17 | GEOMETRY | User-defined | Geometry | +| 18 | VARBINARY | User-defined | Binary data with variable length + :::note - Each row of the table cannot be longer than 48KB (64KB since version 3.0.5.0) (note that each BINARY/NCHAR/GEOMETRY column takes up an additional 2 bytes of storage space). @@ -57,7 +59,7 @@ In TDengine, the data types below can be used when specifying a column or tag. | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | - Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number. - +- VARBINARY is a data type that stores binary data, with a maximum length of 65517 bytes and a maximum length of 16382 bytes for tag columns. Binary data can be written through SQL or schemaless (which needs to be converted to a string starting with \x), or written through stmt (which can directly use binary). Display starting with hexadecimal starting with \x. ::: ## Constants diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index b0c5f82985..9d2b54dab3 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -402,7 +402,7 @@ CAST(expr AS type_name) **Return value type**: The type specified by parameter `type_name` -**Applicable data types**: All data types except JSON +**Applicable data types**: All data types except JSON and VARBINARY. If type_name is VARBINARY, expr can only be VARCHAR. **Nested query**: It can be used in both the outer query and inner query in a nested query. diff --git a/docs/en/14-reference/13-schemaless/13-schemaless.md b/docs/en/14-reference/13-schemaless/13-schemaless.md index e17bc5f31e..d4b6606ac5 100644 --- a/docs/en/14-reference/13-schemaless/13-schemaless.md +++ b/docs/en/14-reference/13-schemaless/13-schemaless.md @@ -32,8 +32,10 @@ All data in tag_set is automatically converted to the NCHAR data type and does n In the schemaless writing data line protocol, each data item in the field_set needs to be described with its data type. Let's explain in detail: -- If there are English double quotes on both sides, it indicates the VARCHAR(N) type. For example, `"abc"`. -- If there are double quotes on both sides and an L prefix, it means NCHAR(N) type. For example, `L"error message"`. +- If there are English double quotes on both sides, it indicates the VARCHAR type. For example, `"abc"`. +- If there are double quotes on both sides and a L/l prefix, it means NCHAR type. For example, `L"error message"`. +- If there are double quotes on both sides and a G/g prefix, it means GEOMETRY type. For example `G"Point(4.343 89.342)"`. +- If there are double quotes on both sides and a B/b prefix, it means VARBINARY type. Hexadecimal start with \x or string can be used in double quotes. For example `B"\x98f46e"` `B"hello"`. - Spaces, equals sign (=), comma (,), double quote ("), and backslash (\\) need to be escaped with a backslash (\\) in front. (All refer to the ASCII character). The rules are as follows: | **Serial number** | **Element** | **Escape characters** | @@ -110,7 +112,7 @@ You can configure smlChildTableName in taos.cfg to specify table names, for exam Note: TDengine 3.0.3.0 and later automatically detect whether order is consistent. This parameter is no longer used. 9. Due to the fact that SQL table names do not support period (.), schemaless has also processed period (.). If there is a period (.) in the table name automatically created by schemaless, it will be automatically replaced with an underscore (\_). If you manually specify a sub table name, if there is a dot (.) in the sub table name, it will also be converted to an underscore (\_) 10. Taos.cfg adds the configuration of smlTsDefaultName (with a string value), which only works on the client side. After configuration, the time column name of the schemaless automatic table creation can be set through this configuration. If not configured, defaults to _ts. - +11. Super table name or child table name are case sensitive. :::tip All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area. ::: diff --git a/docs/zh/12-taos-sql/01-data-type.md b/docs/zh/12-taos-sql/01-data-type.md index 1df07e7e7f..82ddb630a7 100644 --- a/docs/zh/12-taos-sql/01-data-type.md +++ b/docs/zh/12-taos-sql/01-data-type.md @@ -42,11 +42,12 @@ CREATE DATABASE db_name PRECISION 'ns'; | 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 NCHAR 字符占用 4 字节的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\'`。NCHAR 使用时须指定字符串大小,类型为 NCHAR(10) 的列表示此列的字符串最多存储 10 个 NCHAR 字符。如果用户字符串长度超出声明长度,将会报错。 | | 15 | JSON | | JSON 数据类型, 只有 Tag 可以是 JSON 格式 | | 16 | VARCHAR | 自定义 | BINARY 类型的别名 | -| 17 | GEOMETRY | 自定义 | 几何类型 | +| 17 | GEOMETRY | 自定义 | 几何类型 +| 18 | VARBINARY | 自定义 | 可变长的二进制数据| :::note -- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR/GEOMETRY 类型的列还会额外占用 2 个字节的存储位置)。 +- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR/GEOMETRY/VARBINARY 类型的列还会额外占用 2 个字节的存储位置)。 - 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 - BINARY 类型理论上最长可以有 16,374(从 3.0.5.0 版本开始,数据列为 65,517,标签列为 16,382) 字节。BINARY 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 BINARY(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 字节的存储空间,总共固定占用 20 字节的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\'`。 - GEOMETRY 类型数据列为最大长度为 65,517 字节,标签列最大长度为 16,382 字节。支持 2D 的 POINT、LINESTRING 和 POLYGON 子类型数据。长度计算方式如下表所示: @@ -58,6 +59,7 @@ CREATE DATABASE db_name PRECISION 'ns'; | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | - SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。 +- VARBINARY 是一种存储二进制数据的数据类型,最大长度为 65,517 字节,标签列最大长度为 16,382 字节。可以通过sql或schemaless方式写入二进制数据(需要转换为\x开头的字符串写入),也可以通过stmt方式写入(可以直接使用二进制)。显示时通过16进制\x开头。 ::: diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index f7e2a64ea7..cfec71934c 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -402,7 +402,7 @@ CAST(expr AS type_name) **返回结果类型**:CAST 中指定的类型(type_name)。 -**适用数据类型**:输入参数 expression 的类型可以是除JSON外的所有类型。 +**适用数据类型**:输入参数 expr 的类型可以是除JSON和VARBINARY外的所有类型。如果 type_name 为 VARBINARY,则 expr 只能是 VARCHAR 类型。 **嵌套子查询支持**:适用于内层查询和外层查询。 diff --git a/docs/zh/12-taos-sql/14-stream.md b/docs/zh/12-taos-sql/14-stream.md index 43bdc9b04a..929cf9ee4e 100644 --- a/docs/zh/12-taos-sql/14-stream.md +++ b/docs/zh/12-taos-sql/14-stream.md @@ -201,7 +201,6 @@ TDengine 对于修改数据提供两种处理方式,由 IGNORE UPDATE 选项 对于已经存在的超级表,检查列的schema信息 1. 检查列的schema信息是否匹配,对于不匹配的,则自动进行类型转换,当前只有数据长度大于4096byte时才报错,其余场景都能进行类型转换。 2. 检查列的个数是否相同,如果不同,需要显示的指定超级表与subquery的列的对应关系,否则报错;如果相同,可以指定对应关系,也可以不指定,不指定则按位置顺序对应。 -3. 至少自定义一个tag,否则报错。详见 自定义TAG ## 自定义TAG diff --git a/docs/zh/14-reference/13-schemaless/13-schemaless.md b/docs/zh/14-reference/13-schemaless/13-schemaless.md index 7f9d24170e..969bc8c2ae 100644 --- a/docs/zh/14-reference/13-schemaless/13-schemaless.md +++ b/docs/zh/14-reference/13-schemaless/13-schemaless.md @@ -33,8 +33,10 @@ tag_set 中的所有的数据自动转化为 nchar 数据类型,并不需要 在无模式写入数据行协议中,field_set 中的每个数据项都需要对自身的数据类型进行描述。具体来说: -- 如果两边有英文双引号,表示 VARCHAR(N) 类型。例如 `"abc"`。 -- 如果两边有英文双引号而且带有 L 前缀,表示 NCHAR(N) 类型。例如 `L"报错信息"`。 +- 如果两边有英文双引号,表示 VARCHAR 类型。例如 `"abc"`。 +- 如果两边有英文双引号而且带有 L或l 前缀,表示 NCHAR 类型。例如 `L"报错信息"`。 +- 如果两边有英文双引号而且带有 G或g 前缀,表示 GEOMETRY 类型。例如 `G"Point(4.343 89.342)"`。 +- 如果两边有英文双引号而且带有 B或b 前缀,表示 VARBINARY 类型,双引号内可以为\x开头的16进制或者字符串。例如 `B"\x98f46e"` `B"hello"`。 - 对空格、等号(=)、逗号(,)、双引号(")、反斜杠(\),前面需要使用反斜杠(\)进行转义。(都指的是英文半角符号)。具体转义规则如下: | **序号** | **域** | **需转义字符** | @@ -106,6 +108,7 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000 8. 为了提高写入的效率,默认假设同一个超级表中 field_set 的顺序是一样的(第一条数据包含所有的 field,后面的数据按照这个顺序),如果顺序不一样,需要配置参数 smlDataFormat 为 false,否则,数据写入按照相同顺序写入,库中数据会异常,从3.0.3.0开始,自动检测顺序是否一致,该配置废弃。 9. 由于sql建表表名不支持点号(.),所以schemaless也对点号(.)做了处理,如果schemaless自动建表的表名如果有点号(.),会自动替换为下划线(\_)。如果手动指定子表名的话,子表名里有点号(.),同样转化为下划线(\_)。 10. taos.cfg 增加 smlTsDefaultName 配置(值为字符串),只在client端起作用,配置后,schemaless自动建表的时间列名字可以通过该配置设置。不配置的话,默认为 _ts +11. 无模式写入的数据超级表或子表名区分大小写 :::tip 无模式所有的处理逻辑,仍会遵循 TDengine 对数据结构的底层限制,例如每行数据的总长度不能超过 diff --git a/include/common/tglobal.h b/include/common/tglobal.h index aff5945f9f..d7a3d84424 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -191,6 +191,7 @@ extern int64_t tsWalFsyncDataSizeLimit; extern int32_t tsTransPullupInterval; extern int32_t tsMqRebalanceInterval; extern int32_t tsStreamCheckpointTickInterval; +extern int32_t tsStreamNodeCheckInterval; extern int32_t tsTtlUnit; extern int32_t tsTtlPushIntervalSec; extern int32_t tsTtlBatchDropNum; @@ -203,7 +204,6 @@ extern int32_t tsRpcRetryInterval; extern bool tsDisableStream; extern int64_t tsStreamBufferSize; -extern int64_t tsCheckpointInterval; extern bool tsFilterScalarMode; extern int32_t tsKeepTimeOffset; extern int32_t tsMaxStreamBackendCache; diff --git a/include/common/tmisce.h b/include/common/tmisce.h index bc6558900c..3d1afcd21f 100644 --- a/include/common/tmisce.h +++ b/include/common/tmisce.h @@ -28,6 +28,22 @@ typedef struct SCorEpSet { } SCorEpSet; #define GET_ACTIVE_EP(_eps) (&((_eps)->eps[(_eps)->inUse])) + +#define EPSET_TO_STR(_eps, tbuf) \ + do { \ + int len = snprintf((tbuf), sizeof(tbuf), "epset:{"); \ + for (int _i = 0; _i < (_eps)->numOfEps; _i++) { \ + if (_i == (_eps)->numOfEps - 1) { \ + len += \ + snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \ + } else { \ + len += \ + snprintf((tbuf) + len, sizeof(tbuf) - len, "%d. %s:%d, ", _i, (_eps)->eps[_i].fqdn, (_eps)->eps[_i].port); \ + } \ + } \ + len += snprintf((tbuf) + len, sizeof(tbuf) - len, "}, inUse:%d", (_eps)->inUse); \ + } while (0); + int32_t taosGetFqdnPortFromEp(const char* ep, SEp* pEp); void addEpIntoEpSet(SEpSet* pEpSet, const char* fqdn, uint16_t port); diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 29f0667dac..fb2c780724 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -157,6 +157,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_TRANS_TIMER, "trans-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TTL_TIMER, "ttl-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GRANT_HB_TIMER, "grant-hb-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_NODECHECK_TIMER, "node-check-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_TRANS, "kill-trans", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_QUERY, "kill-query", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_KILL_CONN, "kill-conn", NULL, NULL) @@ -175,13 +176,16 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL) - // TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) - // TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_HEARTBEAT, "stream-heartbeat", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_BALANCE_VGROUP_LEADER, "balance-vgroup-leader", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_RESTORE_DNODE, "restore-dnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_PAUSE_STREAM, "pause-stream", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_MSG) @@ -255,15 +259,13 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_PAUSE, "stream-task-pause", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESUME, "stream-task-resume", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_STOP, "stream-task-stop", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_MON_MSG) TD_DEF_MSG_TYPE(TDMT_MON_MAX_MSG, "monitor-max", NULL, NULL) @@ -300,9 +302,12 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) - TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) +// TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY_FINISH, "vnode-stream-scan-history-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_UPDATE, "vnode-stream-update", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) diff --git a/include/libs/audit/audit.h b/include/libs/audit/audit.h index 1381b6e4a2..8465ec510e 100644 --- a/include/libs/audit/audit.h +++ b/include/libs/audit/audit.h @@ -29,6 +29,8 @@ extern "C" { #endif +#define AUDIT_DETAIL_MAX 16000 + typedef struct { const char *server; uint16_t port; diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index e2adcb12dc..5990ae1c9c 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -41,23 +41,21 @@ typedef struct { } SLocalFetch; typedef struct { - void* tqReader; - void* config; - void* vnode; - void* mnd; - SMsgCb* pMsgCb; - int64_t version; - bool initMetaReader; - bool initTableReader; - bool initTqReader; - int32_t numOfVgroups; - void* sContext; // SSnapContext* + void* tqReader; // todo remove it + void* vnode; + void* mnd; + SMsgCb* pMsgCb; + int64_t version; + uint64_t checkpointId; + bool initTableReader; + bool initTqReader; + int32_t numOfVgroups; + void* sContext; // SSnapContext* + void* pStateBackend; + int8_t fillHistory; + STimeWindow winRange; - void* pStateBackend; struct SStorageAPI api; - - int8_t fillHistory; - STimeWindow winRange; } SReadHandle; // in queue mode, data streams are seperated by msg @@ -97,9 +95,6 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); -// todo refactor -void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId); - /** * Set multiple input data blocks for the stream scan. * @param tinfo diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 873b95b29f..0a240dd8f5 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -379,7 +379,7 @@ typedef struct SStateStore { state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); int32_t (*streamStateSessionGetKeyByRange)(SStreamState* pState, const SSessionKey* range, SSessionKey* curKey); - SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark); + SUpdateInfo* (*updateInfoInit)(int64_t interval, int32_t precision, int64_t watermark, bool igUp); TSKEY (*updateInfoFillBlockData)(SUpdateInfo* pInfo, SSDataBlock* pBlock, int32_t primaryTsCol); bool (*updateInfoIsUpdated)(SUpdateInfo* pInfo, uint64_t tableId, TSKEY ts); bool (*updateInfoIsTableInserted)(SUpdateInfo* pInfo, int64_t tbUid); @@ -387,7 +387,7 @@ typedef struct SStateStore { void (*windowSBfDelete)(SUpdateInfo *pInfo, uint64_t count); void (*windowSBfAdd)(SUpdateInfo *pInfo, uint64_t count); - SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark); + SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark, bool igUp); void (*updateInfoAddCloseWindowSBF)(SUpdateInfo* pInfo); void (*updateInfoDestoryColseWinSBF)(SUpdateInfo* pInfo); int32_t (*updateInfoSerialize)(void* buf, int32_t bufLen, const SUpdateInfo* pInfo); @@ -398,7 +398,8 @@ typedef struct SStateStore { SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key); struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize, - uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id); + uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, + const char* id, int64_t ckId); void (*streamFileStateDestroy)(struct SStreamFileState* pFileState); void (*streamFileStateClear)(struct SStreamFileState* pFileState); diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 7347cc5a4d..1954f2a415 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -235,6 +235,7 @@ bool fmIsCumulativeFunc(int32_t funcId); bool fmIsInterpPseudoColumnFunc(int32_t funcId); bool fmIsGroupKeyFunc(int32_t funcId); bool fmIsBlockDistFunc(int32_t funcId); +bool fmIsConstantResFunc(SFunctionNode* pFunc); void getLastCacheDataType(SDataType* pType); SFunctionNode* createFunction(const char* pName, SNodeList* pParameterList); diff --git a/include/libs/stream/streamSnapshot.h b/include/libs/stream/streamSnapshot.h new file mode 100644 index 0000000000..15d5f56ffd --- /dev/null +++ b/include/libs/stream/streamSnapshot.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef _STREAM_BACKEDN_SNAPSHOT_H_ +#define _STREAM_BACKEDN_SNAPSHOT_H_ +#include "tcommon.h" + +#define STREAM_STATE_TRANSFER "stream-state-transfer" + +typedef struct SStreamSnapReader SStreamSnapReader; +typedef struct SStreamSnapWriter SStreamSnapWriter; + +typedef struct SStreamSnapHandle SStreamSnapHandle; +typedef struct SStreamSnapBlockHdr SStreamSnapBlockHdr; + +int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapReader** ppReader); +int32_t streamSnapReaderClose(SStreamSnapReader* pReader); +int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size); + +// SMetaSnapWriter ======================================== +int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter); +int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t streamSnapWriterClose(SStreamSnapWriter* ppWriter, int8_t rollback); + +#endif \ No newline at end of file diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index b47288bf45..99160f1519 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -20,6 +20,7 @@ #include "tmsg.h" #include "tmsgcb.h" #include "tqueue.h" +#include "ttimer.h" #ifdef __cplusplus extern "C" { @@ -30,7 +31,7 @@ extern "C" { typedef struct SStreamTask SStreamTask; -#define SSTREAM_TASK_VER 1 +#define SSTREAM_TASK_VER 2 enum { STREAM_STATUS__NORMAL = 0, STREAM_STATUS__STOP, @@ -48,6 +49,7 @@ enum { TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner TASK_STATUS__HALT, // pause, but not be manipulated by user command TASK_STATUS__PAUSE, // pause + TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore }; enum { @@ -61,15 +63,12 @@ enum { enum { TASK_INPUT_STATUS__NORMAL = 1, TASK_INPUT_STATUS__BLOCKED, - TASK_INPUT_STATUS__RECOVER, - TASK_INPUT_STATUS__STOP, TASK_INPUT_STATUS__FAILED, }; enum { TASK_OUTPUT_STATUS__NORMAL = 1, TASK_OUTPUT_STATUS__WAIT, - TASK_OUTPUT_STATUS__BLOCKED, }; enum { @@ -97,11 +96,16 @@ enum { STREAM_QUEUE__PROCESSING, }; +enum { + STREAM_META_WILL_STOP = 1, + STREAM_META_OK_TO_STOP = 2, +}; + typedef struct { int8_t type; } SStreamQueueItem; -typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { @@ -119,14 +123,13 @@ typedef struct { } SStreamMergedSubmit; typedef struct { - int8_t type; - + int8_t type; + int64_t nodeId; // nodeId, from SStreamMeta int32_t srcVgId; int32_t srcTaskId; int32_t childId; int64_t sourceVer; int64_t reqId; - SArray* blocks; // SArray } SStreamDataBlock; @@ -136,10 +139,6 @@ typedef struct { SSDataBlock* pBlock; } SStreamRefDataBlock; -typedef struct { - int8_t type; -} SStreamCheckpoint; - typedef struct { int8_t type; SSDataBlock* pBlock; @@ -179,7 +178,7 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue); #endif typedef struct { - STaosQueue* queue; + STaosQueue* pQueue; STaosQall* qall; void* qItem; int8_t status; @@ -190,19 +189,9 @@ void streamCleanUp(); SStreamQueue* streamQueueOpen(int64_t cap); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); - -static FORCE_INLINE void streamQueueProcessSuccess(SStreamQueue* queue) { - ASSERT(atomic_load_8(&queue->status) == STREAM_QUEUE__PROCESSING); - queue->qItem = NULL; - atomic_store_8(&queue->status, STREAM_QUEUE__SUCESS); -} - -static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) { - ASSERT(atomic_load_8(&queue->status) == STREAM_QUEUE__PROCESSING); - atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); -} - -void* streamQueueNextItem(SStreamQueue* pQueue); +void streamQueueProcessSuccess(SStreamQueue* queue); +void streamQueueProcessFail(SStreamQueue* queue); +void* streamQueueNextItem(SStreamQueue* pQueue); SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit); @@ -252,20 +241,26 @@ typedef struct SStreamChildEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; - int8_t dataAllowed; SEpSet epSet; + bool dataAllowed; // denote if the data from this upstream task is allowed to put into inputQ, not serialize it + int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer } SStreamChildEpInfo; -typedef struct SStreamId { +typedef struct SStreamTaskKey { + int64_t streamId; + int32_t taskId; +} SStreamTaskKey; + +typedef struct SStreamTaskId { int64_t streamId; int32_t taskId; const char* idStr; -} SStreamId; +} SStreamTaskId; typedef struct SCheckpointInfo { - int64_t id; - int64_t version; // offset in WAL - int64_t currentVer; // current offset in WAL, not serialize it + int64_t checkpointId; + int64_t checkpointVer; // latest checkpointId version + int64_t currentVer; // current offset in WAL, not serialize it } SCheckpointInfo; typedef struct SStreamStatus { @@ -273,24 +268,25 @@ typedef struct SStreamStatus { int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set int8_t schedStatus; int8_t keepTaskStatus; - bool transferState; bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it - int8_t timerActive; // timer is active - int8_t pauseAllowed; // allowed task status to be set to be paused + int8_t timerActive; // timer is active + int8_t pauseAllowed; // allowed task status to be set to be paused } SStreamStatus; -typedef struct SHistDataRange { +typedef struct SDataRange { SVersionRange range; STimeWindow window; -} SHistDataRange; +} SDataRange; typedef struct SSTaskBasicInfo { int32_t nodeId; // vgroup id or snode id SEpSet epSet; + SEpSet mnodeEpset; // mnode epset for send heartbeat int32_t selfChildId; int32_t totalLevel; int8_t taskLevel; int8_t fillHistory; // is fill history task or not + int64_t triggerParam; // in msec } SSTaskBasicInfo; typedef struct SDispatchMsgInfo { @@ -300,12 +296,22 @@ typedef struct SDispatchMsgInfo { int64_t blockingTs; // output blocking timestamp } SDispatchMsgInfo; -typedef struct { +typedef struct STaskOutputInfo { int8_t type; int8_t status; SStreamQueue* queue; } STaskOutputInfo; +typedef struct STaskInputInfo { + int8_t status; + SStreamQueue* queue; +} STaskInputInfo; + +typedef struct STaskSchedInfo { + int8_t status; + void* pTimer; +} STaskSchedInfo; + typedef struct { int64_t init; int64_t step1Start; @@ -314,20 +320,23 @@ typedef struct { struct SStreamTask { int64_t ver; - SStreamId id; + SStreamTaskId id; SSTaskBasicInfo info; STaskOutputInfo outputInfo; + STaskInputInfo inputInfo; + STaskSchedInfo schedInfo; SDispatchMsgInfo msgInfo; SStreamStatus status; SCheckpointInfo chkInfo; STaskExec exec; - SHistDataRange dataRange; - SStreamId historyTaskId; - SStreamId streamTaskId; - SArray* pUpstreamEpInfoList; // SArray, // children info - int32_t nextCheckId; - SArray* checkpointInfo; // SArray + SDataRange dataRange; + SStreamTaskId historyTaskId; + SStreamTaskId streamTaskId; STaskTimestamp tsInfo; + SArray* pReadyMsgList; // SArray + TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ + SArray* pUpstreamInfoList; + // output union { STaskDispatcherFixedEp fixedEpDispatcher; @@ -337,18 +346,10 @@ struct SStreamTask { STaskSinkFetch fetchSink; }; - int8_t inputStatus; - SStreamQueue* inputQueue; - - // trigger - int8_t triggerStatus; - int64_t triggerParam; - void* schedTimer; void* launchTaskTimer; SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend SArray* pRspMsgList; - TdThreadMutex lock; // the followings attributes don't be serialized int32_t notReadyTasks; @@ -358,11 +359,19 @@ struct SStreamTask { int32_t refCnt; int64_t checkpointingId; int32_t checkpointAlignCnt; + int32_t checkpointNotReadyTasks; int32_t transferStateAlignCnt; struct SStreamMeta* pMeta; SSHashObj* pNameMap; + char reserve[256]; }; +typedef struct SMetaHbInfo { + tmr_h hbTmr; + int32_t stopFlag; + int32_t tickCounter; +} SMetaHbInfo; + // meta typedef struct SStreamMeta { char* path; @@ -375,12 +384,25 @@ typedef struct SStreamMeta { TXN* txn; FTaskExpand* expandFunc; int32_t vgId; + int64_t stage; SRWLatch lock; int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; SHashObj* pTaskBackendUnique; TdThreadMutex backendMutex; + SMetaHbInfo hbInfo; + int32_t closedTask; + int32_t totalTasks; // this value should be increased when a new task is added into the meta + int32_t chkptNotReadyTasks; + int64_t rid; + + int64_t chkpId; + SArray* chkpSaved; + SArray* chkpInUse; + int32_t chkpCap; + SRWLatch chkpDirLock; + int32_t pauseTaskNum; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -391,8 +413,14 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeStreamTask(SStreamTask* pTask); -int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem); -bool tInputQueueIsFull(const SStreamTask* pTask); +int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver); + +int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo); +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId); + +int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem); +int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask); +bool streamQueueIsFull(const STaosQueue* pQueue); typedef struct { SMsgHead head; @@ -401,8 +429,9 @@ typedef struct { } SStreamTaskRunReq; typedef struct { - int64_t streamId; int32_t type; + int64_t stage; // nodeId from upstream task + int64_t streamId; int32_t taskId; int32_t srcVgId; int32_t upstreamTaskId; @@ -443,6 +472,7 @@ typedef struct { typedef struct { int64_t reqId; + int64_t stage; int64_t streamId; int32_t upstreamNodeId; int32_t upstreamTaskId; @@ -459,6 +489,7 @@ typedef struct { int32_t downstreamNodeId; int32_t downstreamTaskId; int32_t childId; + int32_t oldStage; int8_t status; } SStreamTaskCheckRsp; @@ -485,6 +516,8 @@ typedef struct { int64_t checkpointId; int32_t taskId; int32_t nodeId; + SEpSet mgmtEps; + int32_t mnodeId; int64_t expireTime; } SStreamCheckpointSourceReq; @@ -493,14 +526,16 @@ typedef struct { int64_t checkpointId; int32_t taskId; int32_t nodeId; + int32_t mnodeId; int64_t expireTime; + int8_t success; } SStreamCheckpointSourceRsp; -int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); -int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); +int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); +int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); -int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); -int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); +int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); +int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); typedef struct { SMsgHead msgHead; @@ -511,28 +546,25 @@ typedef struct { int32_t upstreamTaskId; int32_t upstreamNodeId; int32_t childId; - int64_t expireTime; - int8_t taskLevel; -} SStreamCheckpointReq; +} SStreamCheckpointReadyMsg; -typedef struct { - SMsgHead msgHead; - int64_t streamId; - int64_t checkpointId; - int32_t downstreamTaskId; - int32_t downstreamNodeId; - int32_t upstreamTaskId; - int32_t upstreamNodeId; - int32_t childId; - int64_t expireTime; - int8_t taskLevel; -} SStreamCheckpointRsp; +int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pRsp); +int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp); -int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq); -int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq); +typedef struct STaskStatusEntry { + int64_t streamId; + int32_t taskId; + int32_t status; +} STaskStatusEntry; -int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); -int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); +typedef struct SStreamHbMsg { + int32_t vgId; + int32_t numOfTasks; + SArray* pTaskStatus; // SArray +} SStreamHbMsg; + +int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp); +int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pRsp); typedef struct { int64_t streamId; @@ -545,6 +577,21 @@ typedef struct { int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq); int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistoryMsg* pReq); +typedef struct SNodeUpdateInfo { + int32_t nodeId; + SEpSet prevEp; + SEpSet newEp; +} SNodeUpdateInfo; + +typedef struct SStreamTaskNodeUpdateMsg { + int64_t streamId; + int32_t taskId; + SArray* pNodeList; // SArray +} SStreamTaskNodeUpdateMsg; + +int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg); +int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg); + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -564,16 +611,11 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); -int32_t tEncodeSStreamTaskScanHistoryReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq); -int32_t tDecodeSStreamTaskScanHistoryReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq); - -int32_t tEncodeSStreamTaskRecoverRsp(SEncoder* pEncoder, const SStreamRecoverDownstreamRsp* pRsp); -int32_t tDecodeSStreamTaskRecoverRsp(SDecoder* pDecoder, SStreamRecoverDownstreamRsp* pRsp); - +int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq); int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); + int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); - void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); int32_t streamSetupScheduleTrigger(SStreamTask* pTask); @@ -581,10 +623,9 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessRunReq(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); -void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); -void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); -int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); +int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); +SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); void streamTaskInputFail(SStreamTask* pTask); int32_t streamTryExec(SStreamTask* pTask); @@ -594,16 +635,19 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); +int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); +void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); // recover and fill history -void streamTaskCheckDownstreamTasks(SStreamTask* pTask); -int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask); +void streamTaskCheckDownstream(SStreamTask* pTask); int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); -int32_t streamTaskCheckStatus(SStreamTask* pTask); +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); +int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); +void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); + +int32_t streamTaskStop(SStreamTask* pTask); int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, SRpcHandleInfo* pRpcInfo, int32_t taskId); int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp); @@ -611,17 +655,26 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask); int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); +int32_t streamTaskGetInputQItems(const SStreamTask* pTask); // common int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamSetStatusNormal(SStreamTask* pTask); const char* streamGetTaskStatusStr(int32_t status); -void streamTaskPause(SStreamTask* pTask); -void streamTaskResume(SStreamTask* pTask); +void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); +void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskHalt(SStreamTask* pTask); void streamTaskResumeFromHalt(SStreamTask* pTask); void streamTaskDisablePause(SStreamTask* pTask); void streamTaskEnablePause(SStreamTask* pTask); +int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); +void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); +int32_t streamTaskReleaseState(SStreamTask* pTask); +int32_t streamTaskReloadState(SStreamTask* pTask); +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); @@ -629,8 +682,6 @@ int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* p int32_t streamSourceScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); -int32_t appendTranstateIntoInputQ(SStreamTask* pTask); - // agg level int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask); int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, @@ -640,31 +691,32 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); // stream task meta void streamMetaInit(); void streamMetaCleanup(); -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId); +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage); void streamMetaClose(SStreamMeta* streamMeta); - -// save to b-tree meta store -int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); +int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey); int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); -int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); // todo remove it +int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); +int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); - -int32_t streamMetaBegin(SStreamMeta* pMeta); -int32_t streamMetaCommit(SStreamMeta* pMeta); -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); +int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId); +int32_t streamMetaCommit(SStreamMeta* pMeta); +int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +void streamMetaNotifyClose(SStreamMeta* pMeta); // checkpoint -int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); -int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); -int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); +int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); -int32_t streamTaskReleaseState(SStreamTask* pTask); -int32_t streamTaskReloadState(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); +int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, + int8_t isSucceed); +int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, + int8_t isSucceed); + #ifdef __cplusplus } #endif diff --git a/include/libs/stream/tstreamFileState.h b/include/libs/stream/tstreamFileState.h index b2255013ca..052231fe39 100644 --- a/include/libs/stream/tstreamFileState.h +++ b/include/libs/stream/tstreamFileState.h @@ -31,7 +31,8 @@ typedef struct SStreamFileState SStreamFileState; typedef SList SStreamSnapshot; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark, const char* id); + GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, + int64_t checkpointId); void streamFileStateDestroy(SStreamFileState* pFileState); void streamFileStateClear(SStreamFileState* pFileState); bool needClearDiskBuff(SStreamFileState* pFileState); @@ -44,7 +45,7 @@ bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen); SStreamSnapshot* getSnapshot(SStreamFileState* pFileState); int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState); -int32_t recoverSnapshot(SStreamFileState* pFileState); +int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId); int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list); int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark); diff --git a/include/libs/stream/tstreamUpdate.h b/include/libs/stream/tstreamUpdate.h index 7bb1d027c9..41ada56904 100644 --- a/include/libs/stream/tstreamUpdate.h +++ b/include/libs/stream/tstreamUpdate.h @@ -43,8 +43,8 @@ typedef struct SUpdateKey { // uint64_t maxDataVersion; //} SUpdateInfo; -SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark); -SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark); +SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp); +SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp); TSKEY updateInfoFillBlockData(SUpdateInfo *pInfo, SSDataBlock *pBlock, int32_t primaryTsCol); bool updateInfoIsUpdated(SUpdateInfo *pInfo, uint64_t tableId, TSKEY ts); bool updateInfoIsTableInserted(SUpdateInfo *pInfo, int64_t tbUid); diff --git a/include/util/tarray.h b/include/util/tarray.h index f56c9e3a17..4d9c930521 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -200,8 +200,11 @@ void taosArrayClear(SArray* pArray); * @param pArray * @param fp */ + void taosArrayClearEx(SArray* pArray, void (*fp)(void*)); +void taosArrayClearP(SArray* pArray, void (*fp)(void*)); + void* taosArrayDestroy(SArray* pArray); void taosArrayDestroyP(SArray* pArray, FDelete fp); diff --git a/include/util/tencode.h b/include/util/tencode.h index ff97a20507..d05d4914e3 100644 --- a/include/util/tencode.h +++ b/include/util/tencode.h @@ -89,7 +89,7 @@ typedef struct { RET = -1; \ } \ tEncoderClear(&coder); \ - } while (0) + } while (0); static void* tEncoderMalloc(SEncoder* pCoder, int32_t size); static void* tDecoderMalloc(SDecoder* pCoder, int32_t size); diff --git a/include/util/types.h b/include/util/types.h index b49670220b..0aa01a66f5 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -85,8 +85,6 @@ typedef uint16_t VarDataLenT; // maxVarDataLen: 65535 #define varDataVal(v) ((char *)(v) + VARSTR_HEADER_SIZE) #define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) -#define NCHAR_WIDTH_TO_BYTES(n) ((n)*TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE) - typedef int32_t VarDataOffsetT; typedef struct tstr { diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 6f978b0143..a1c8690dfc 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -826,6 +826,25 @@ TEST(clientCase, projection_query_tables) { } taos_free_result(pRes); + int64_t start = 1685959190000; + + int32_t code = -1; + for(int32_t i = 0; i < 1000000; ++i) { + char t[512] = {0}; + + sprintf(t, "insert into t1 values(%ld, %ld)", start + i, i); + while(1) { + void* p = taos_query(pConn, t); + code = taos_errno(p); + taos_free_result(p); + if (code != 0) { + printf("insert data error, retry\n"); + } else { + break; + } + } + } + for (int32_t i = 0; i < 1; ++i) { printf("create table :%d\n", i); createNewTable(pConn, i); @@ -901,13 +920,40 @@ TEST(clientCase, agg_query_tables) { } taos_free_result(pRes); - pRes = taos_query(pConn, "show table distributed tup"); - if (taos_errno(pRes) != 0) { - printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); - taos_free_result(pRes); - ASSERT_TRUE(false); + int64_t st = 1685959293000; + for (int32_t i = 0; i < 10000000; ++i) { + char s[256] = {0}; + + while (1) { + sprintf(s, "insert into t1 values(%ld, %d)", st + i, i); + pRes = taos_query(pConn, s); + + int32_t ret = taos_errno(pRes); + taos_free_result(pRes); + if (ret == 0) { + break; + } + } + + while (1) { + sprintf(s, "insert into t2 values(%ld, %d)", st + i, i); + pRes = taos_query(pConn, s); + int32_t ret = taos_errno(pRes); + + taos_free_result(pRes); + if (ret == 0) { + break; + } + } } +// pRes = taos_query(pConn, "show table distributed tup"); +// if (taos_errno(pRes) != 0) { +// printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); +// taos_free_result(pRes); +// ASSERT_TRUE(false); +// } + printResult(pRes); taos_free_result(pRes); taos_close(pConn); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 8a500b0178..f7bb6f85e2 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2359,6 +2359,23 @@ void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList int32_t maxRows = 0; size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); + // it is a reserved column for scalar function, and no data in this column yet. + if (pDst->pData == NULL) { + continue; + } + + int32_t numOfRows = 0; + if (IS_VAR_DATA_TYPE(pDst->info.type)) { + pDst->varmeta.length = 0; + } + } + + if (NULL == pBoolList) { + return; + } + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); // it is a reserved column for scalar function, and no data in this column yet. diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9c242d7c1e..ff9e922ee1 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -128,7 +128,7 @@ int32_t tsQueryPolicy = 1; int32_t tsQueryRspPolicy = 0; int64_t tsQueryMaxConcurrentTables = 200; // unit is TSDB_TABLE_NUM_UNIT bool tsEnableQueryHb = true; -bool tsEnableScience = false; // on taos-cli show float and doulbe with scientific notation if true +bool tsEnableScience = false; // on taos-cli show float and doulbe with scientific notation if true int32_t tsQuerySmaOptimize = 0; int32_t tsQueryRsmaTolerance = 1000; // the tolerance time (ms) to judge from which level to query rsma data. bool tsQueryPlannerTrace = false; @@ -240,17 +240,17 @@ int32_t tsTtlBatchDropNum = 10000; // number of tables dropped per batch // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; -int32_t tsStreamCheckpointTickInterval = 1; +int32_t tsStreamCheckpointTickInterval = 600; +int32_t tsStreamNodeCheckInterval = 10; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; -int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups +int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups int32_t tsGrantHBInterval = 60; int32_t tsUptimeInterval = 300; // seconds char tsUdfdResFuncs[512] = ""; // udfd resident funcs that teardown when udfd exits char tsUdfdLdLibPath[512] = ""; bool tsDisableStream = false; int64_t tsStreamBufferSize = 128 * 1024 * 1024; -int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000; bool tsFilterScalarMode = false; int32_t tsKeepTimeOffset = 0; // latency of data migration int tsResolveFQDNRetryTime = 100; // seconds @@ -263,6 +263,8 @@ char tsS3BucketName[TSDB_FQDN_LEN] = ""; char tsS3AppId[TSDB_FQDN_LEN] = ""; int8_t tsS3Enabled = false; +int32_t tsCheckpointInterval = 20; + #ifndef _STORAGE int32_t taosSetTfsCfg(SConfig *pCfg) { SConfigItem *pItem = cfgGetItem(pCfg, "dataDir"); @@ -1057,7 +1059,6 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsDisableStream = cfgGetItem(pCfg, "disableStream")->bval; tsStreamBufferSize = cfgGetItem(pCfg, "streamBufferSize")->i64; - tsCheckpointInterval = cfgGetItem(pCfg, "checkpointInterval")->i64; tsFilterScalarMode = cfgGetItem(pCfg, "filterScalarMode")->bval; tsKeepTimeOffset = cfgGetItem(pCfg, "keepTimeOffset")->i32; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index eaa80ba775..4c43326959 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -33,9 +33,11 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { return -1; } - SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica, + SMnodeOpt option = {.deploy = true, + .numOfReplicas = createReq.replica, .numOfTotalReplicas = createReq.replica + createReq.learnerReplica, - .selfIndex = -1, .lastIndex = createReq.lastIndex}; + .selfIndex = -1, + .lastIndex = createReq.lastIndex}; memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas)); for (int32_t i = 0; i < createReq.replica; ++i) { @@ -204,6 +206,10 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 8206b4e425..13b81231d4 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -76,10 +76,12 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index f43e1f5537..0251b9b636 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -783,20 +783,24 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_INDEX, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; +// if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 6dd7a13c66..1bf13c8fb5 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -148,39 +148,39 @@ typedef enum { } ECsmUpdateType; typedef struct { - int32_t id; - ETrnStage stage; - ETrnPolicy policy; - ETrnConflct conflict; - ETrnExec exec; - EOperType oper; - int32_t code; - int32_t failedTimes; - void* rpcRsp; - int32_t rpcRspLen; - int32_t redoActionPos; - SArray* prepareActions; - SArray* redoActions; - SArray* undoActions; - SArray* commitActions; - int64_t createdTime; - int64_t lastExecTime; - int32_t lastAction; - int32_t lastErrorNo; - SEpSet lastEpset; - tmsg_t lastMsgType; - tmsg_t originRpcType; - char dbname[TSDB_TABLE_FNAME_LEN]; - char stbname[TSDB_TABLE_FNAME_LEN]; - int32_t startFunc; - int32_t stopFunc; - int32_t paramLen; - void* param; - char opername[TSDB_TRANS_OPER_LEN]; - SArray* pRpcArray; - SRWLatch lockRpcArray; - int64_t mTraceId; - TdThreadMutex mutex; + int32_t id; + ETrnStage stage; + ETrnPolicy policy; + ETrnConflct conflict; + ETrnExec exec; + EOperType oper; + int32_t code; + int32_t failedTimes; + void* rpcRsp; + int32_t rpcRspLen; + int32_t redoActionPos; + SArray* prepareActions; + SArray* redoActions; + SArray* undoActions; + SArray* commitActions; + int64_t createdTime; + int64_t lastExecTime; + int32_t lastAction; + int32_t lastErrorNo; + SEpSet lastEpset; + tmsg_t lastMsgType; + tmsg_t originRpcType; + char dbname[TSDB_TABLE_FNAME_LEN]; + char stbname[TSDB_TABLE_FNAME_LEN]; + int32_t startFunc; + int32_t stopFunc; + int32_t paramLen; + void* param; + char opername[TSDB_TRANS_OPER_LEN]; + SArray* pRpcArray; + SRWLatch lockRpcArray; + int64_t mTraceId; + TdThreadMutex mutex; } STrans; typedef struct { @@ -453,20 +453,20 @@ typedef struct { } SStbObj; typedef struct { - char name[TSDB_FUNC_NAME_LEN]; - int64_t createdTime; - int8_t funcType; - int8_t scriptType; - int8_t align; - int8_t outputType; - int32_t outputLen; - int32_t bufSize; - int64_t signature; - int32_t commentSize; - int32_t codeSize; - char* pComment; - char* pCode; - int32_t funcVersion; + char name[TSDB_FUNC_NAME_LEN]; + int64_t createdTime; + int8_t funcType; + int8_t scriptType; + int8_t align; + int8_t outputType; + int32_t outputLen; + int32_t bufSize; + int64_t signature; + int32_t commentSize; + int32_t codeSize; + char* pComment; + char* pCode; + int32_t funcVersion; SRWLatch lock; } SFuncObj; @@ -561,10 +561,10 @@ typedef struct { int64_t subscribeTime; int64_t rebalanceTime; - int8_t withTbName; - int8_t autoCommit; - int32_t autoCommitInterval; - int32_t resetOffsetCfg; + int8_t withTbName; + int8_t autoCommit; + int32_t autoCommitInterval; + int32_t resetOffsetCfg; } SMqConsumerObj; SMqConsumerObj* tNewSMqConsumerObj(int64_t consumerId, char cgroup[TSDB_CGROUP_LEN]); @@ -574,8 +574,8 @@ void* tDecodeSMqConsumerObj(const void* buf, SMqConsumerObj* pConsumer typedef struct { int32_t vgId; -// char* qmsg; // SubPlanToString - SEpSet epSet; + // char* qmsg; // SubPlanToString + SEpSet epSet; } SMqVgEp; SMqVgEp* tCloneSMqVgEp(const SMqVgEp* pVgEp); @@ -589,10 +589,10 @@ typedef struct { SArray* offsetRows; // SArray } SMqConsumerEp; -//SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp); -//void tDeleteSMqConsumerEp(void* pEp); -int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pEp); -void* tDecodeSMqConsumerEp(const void* buf, SMqConsumerEp* pEp, int8_t sver); +// SMqConsumerEp* tCloneSMqConsumerEp(const SMqConsumerEp* pEp); +// void tDeleteSMqConsumerEp(void* pEp); +int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pEp); +void* tDecodeSMqConsumerEp(const void* buf, SMqConsumerEp* pEp, int8_t sver); typedef struct { char key[TSDB_SUBSCRIBE_KEY_LEN]; @@ -606,7 +606,7 @@ typedef struct { SArray* unassignedVgs; // SArray SArray* offsetRows; char dbName[TSDB_DB_FNAME_LEN]; - char* qmsg; // SubPlanToString + char* qmsg; // SubPlanToString } SMqSubscribeObj; SMqSubscribeObj* tNewSubscribeObj(const char key[TSDB_SUBSCRIBE_KEY_LEN]); @@ -615,25 +615,25 @@ void tDeleteSubscribeObj(SMqSubscribeObj* pSub); int32_t tEncodeSubscribeObj(void** buf, const SMqSubscribeObj* pSub); void* tDecodeSubscribeObj(const void* buf, SMqSubscribeObj* pSub, int8_t sver); -//typedef struct { -// int32_t epoch; -// SArray* consumers; // SArray -//} SMqSubActionLogEntry; +// typedef struct { +// int32_t epoch; +// SArray* consumers; // SArray +// } SMqSubActionLogEntry; -//SMqSubActionLogEntry* tCloneSMqSubActionLogEntry(SMqSubActionLogEntry* pEntry); -//void tDeleteSMqSubActionLogEntry(SMqSubActionLogEntry* pEntry); -//int32_t tEncodeSMqSubActionLogEntry(void** buf, const SMqSubActionLogEntry* pEntry); -//void* tDecodeSMqSubActionLogEntry(const void* buf, SMqSubActionLogEntry* pEntry); +// SMqSubActionLogEntry* tCloneSMqSubActionLogEntry(SMqSubActionLogEntry* pEntry); +// void tDeleteSMqSubActionLogEntry(SMqSubActionLogEntry* pEntry); +// int32_t tEncodeSMqSubActionLogEntry(void** buf, const SMqSubActionLogEntry* pEntry); +// void* tDecodeSMqSubActionLogEntry(const void* buf, SMqSubActionLogEntry* pEntry); // -//typedef struct { -// char key[TSDB_SUBSCRIBE_KEY_LEN]; -// SArray* logs; // SArray -//} SMqSubActionLogObj; +// typedef struct { +// char key[TSDB_SUBSCRIBE_KEY_LEN]; +// SArray* logs; // SArray +// } SMqSubActionLogObj; // -//SMqSubActionLogObj* tCloneSMqSubActionLogObj(SMqSubActionLogObj* pLog); -//void tDeleteSMqSubActionLogObj(SMqSubActionLogObj* pLog); -//int32_t tEncodeSMqSubActionLogObj(void** buf, const SMqSubActionLogObj* pLog); -//void* tDecodeSMqSubActionLogObj(const void* buf, SMqSubActionLogObj* pLog); +// SMqSubActionLogObj* tCloneSMqSubActionLogObj(SMqSubActionLogObj* pLog); +// void tDeleteSMqSubActionLogObj(SMqSubActionLogObj* pLog); +// int32_t tEncodeSMqSubActionLogObj(void** buf, const SMqSubActionLogObj* pLog); +// void* tDecodeSMqSubActionLogObj(const void* buf, SMqSubActionLogObj* pLog); typedef struct { int32_t oldConsumerNum; @@ -647,12 +647,12 @@ typedef struct { } SMqRebOutputVg; typedef struct { - SArray* rebVgs; // SArray - SArray* newConsumers; // SArray - SArray* removedConsumers; // SArray - SArray* modifyConsumers; // SArray - SMqSubscribeObj* pSub; -// SMqSubActionLogEntry* pLogEntry; + SArray* rebVgs; // SArray + SArray* newConsumers; // SArray + SArray* removedConsumers; // SArray + SArray* modifyConsumers; // SArray + SMqSubscribeObj* pSub; + // SMqSubActionLogEntry* pLogEntry; } SMqRebOutputObj; typedef struct SStreamConf { @@ -674,8 +674,8 @@ typedef struct { int32_t totalLevel; int64_t smaId; // 0 for unused // info - int64_t uid; - int8_t status; + int64_t uid; + int8_t status; SStreamConf conf; // source and target int64_t sourceDbUid; @@ -690,13 +690,13 @@ typedef struct { int32_t fixedSinkVgId; // 0 for shuffle // transformation - char* sql; - char* ast; - char* physicalPlan; - SArray* tasks; // SArray> + char* sql; + char* ast; + char* physicalPlan; + SArray* tasks; // SArray> - SArray* pHTasksList; // generate the results for already stored ts data - int64_t hTaskUid; // stream task for history ts data + SArray* pHTasksList; // generate the results for already stored ts data + int64_t hTaskUid; // stream task for history ts data SSchemaWrapper outputSchema; SSchemaWrapper tagSchema; @@ -706,18 +706,23 @@ typedef struct { int64_t currentTick; // do not serialize int64_t deleteMark; int8_t igCheckUpdate; + + // 3.0.5. + int64_t checkpointId; + char reserve[256]; + } SStreamObj; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); -//typedef struct { -// char streamName[TSDB_STREAM_FNAME_LEN]; -// int64_t uid; -// int64_t streamUid; -// SArray* childInfo; // SArray -//} SStreamCheckpointObj; +// typedef struct { +// char streamName[TSDB_STREAM_FNAME_LEN]; +// int64_t uid; +// int64_t streamUid; +// SArray* childInfo; // SArray +// } SStreamCheckpointObj; #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 05adc17d64..19fd2a3fd4 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -33,6 +33,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); + // for sma // TODO refactor int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 3dab144eef..d01daee5a7 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -77,10 +77,15 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; - // 3.0.20 + // 3.0.20 ver =2 if (tEncodeI64(pEncoder, pObj->checkpointFreq) < 0) return -1; if (tEncodeI8(pEncoder, pObj->igCheckUpdate) < 0) return -1; + // 3.0.50 ver = 3 + if (tEncodeI64(pEncoder, pObj->checkpointId) < 0) return -1; + + if (tEncodeCStrWithLen(pEncoder, pObj->reserve, sizeof(pObj->reserve) - 1) < 0) return -1; + tEndEncode(pEncoder); return pEncoder->pos; } @@ -151,6 +156,11 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { if (tDecodeI8(pDecoder, &pObj->igCheckUpdate) < 0) return -1; } } + if (sver >= 3) { + if (tDecodeI64(pDecoder, &pObj->checkpointId) < 0) return -1; + } + if (tDecodeCStrTo(pDecoder, pObj->reserve) < 0) return -1; + tEndDecode(pDecoder); return 0; } diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 949d41ef07..115c33cff1 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -1062,16 +1062,16 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) { code = mndDropDnode(pMnode, pReq, pDnode, pMObj, pQObj, pSObj, numOfVnodes, force, dropReq.unsafe); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char obj1[150] = {0}; - sprintf(obj1, "%s:%d", dropReq.fqdn, dropReq.port); + char obj1[30] = {0}; + sprintf(obj1, "%d", dropReq.dnodeId); - char obj2[30] = {0}; - sprintf(obj2, "%d", dropReq.dnodeId); + //char obj2[150] = {0}; + //sprintf(obj2, "%s:%d", dropReq.fqdn, dropReq.port); char detail[100] = {0}; sprintf(detail, "force:%d, unsafe:%d", dropReq.force, dropReq.unsafe); - auditRecord(pReq, pMnode->clusterId, "dropDnode", obj1, obj2, detail); + auditRecord(pReq, pMnode->clusterId, "dropDnode", obj1, "", detail); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 12e28969c9..1c87cde78a 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -128,29 +128,31 @@ static void mndPullupTrimDb(SMnode *pMnode) { } static void mndCalMqRebalance(SMnode *pMnode) { - mTrace("calc mq rebalance"); int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { - SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen }; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } -#if 0 static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { int32_t contLen = 0; void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); if (pReq != NULL) { - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, - .pCont = pReq, - .contLen = contLen, - }; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + +static void mndStreamCheckNode(SMnode* pMnode) { + int32_t contLen = 0; + void *pReq = mndBuildTimerMsg(&contLen); + if (pReq != NULL) { + SRpcMsg rpcMsg = {.msgType = TDMT_MND_NODECHECK_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } -#endif static void mndPullupTelem(SMnode *pMnode) { mTrace("pullup telem msg"); @@ -279,11 +281,13 @@ static void *mndThreadFp(void *param) { mndCalMqRebalance(pMnode); } -#if 0 if (sec % tsStreamCheckpointTickInterval == 0) { mndStreamCheckpointTick(pMnode, sec); } -#endif + + if (sec % tsStreamNodeCheckInterval == 0) { + mndStreamCheckNode(pMnode); + } if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) { mndPullupTelem(pMnode); @@ -599,7 +603,7 @@ int32_t mndIsCatchUp(SMnode *pMnode) { return syncIsCatchUp(rid); } -ESyncRole mndGetRole(SMnode *pMnode){ +ESyncRole mndGetRole(SMnode *pMnode) { int64_t rid = pMnode->syncMgmt.sync; return syncGetRole(rid); } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 36771147a9..1d7d391acf 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -14,6 +14,8 @@ */ #include "mndScheduler.h" +#include "tmisce.h" +#include "mndMnode.h" #include "mndDb.h" #include "mndSnode.h" #include "mndVgroup.h" @@ -25,10 +27,8 @@ #define SINK_NODE_LEVEL (0) extern bool tsDeployOnSnode; -static int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream); static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, - SVgObj* pVgroup, int32_t fillHistory); -static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); + SVgObj* pVgroup, SEpSet* pEpset, int32_t fillHistory); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { @@ -141,7 +141,7 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr } } else { SStreamTask* pOneSinkTask = taosArrayGetP(pSinkNodeList, 0); - setFixedDownstreamEpInfo(pTask, pOneSinkTask); + streamTaskSetFixedDownstreamInfo(pTask, pOneSinkTask); } return 0; @@ -207,7 +207,8 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { } // create sink node for each vgroup. -int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) { +int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, SEpSet* pEpset, + int32_t fillHistory) { SSdb* pSdb = pMnode->pSdb; void* pIter = NULL; @@ -223,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea continue; } - mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory); + mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, pEpset, fillHistory); sdbRelease(pSdb, pVgroup); } @@ -231,7 +232,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea } int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, - int32_t fillHistory) { + SEpSet* pEpset, int32_t fillHistory) { int64_t uid = (fillHistory == 0)? pStream->uid:pStream->hTaskUid; SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList); if (pTask == NULL) { @@ -239,6 +240,8 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p return -1; } + epsetAssign(&(pTask)->info.mnodeEpset, pEpset); + pTask->info.nodeId = vgId; pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup); mndSetSinkTaskInfo(pStream, pTask); @@ -246,13 +249,15 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p } static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList, - SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory, - bool hasExtraSink, int64_t firstWindowSkey) { + SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset, + int8_t fillHistory, bool hasExtraSink, int64_t firstWindowSkey) { SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList); if (pTask == NULL) { return terrno; } + epsetAssign(&pTask->info.mnodeEpset, pEpset); + // todo set the correct ts, which should be last key of queried table. STimeWindow* pWindow = &pTask->dataRange.window; @@ -273,51 +278,12 @@ static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTas for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); - setTaskUpstreamEpInfo(pTask, pSinkTask); + streamTaskSetUpstreamInfo(pSinkTask, pTask); } return TSDB_CODE_SUCCESS; } -static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { - SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); - if (pEpInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pEpInfo->childId = pTask->info.selfChildId; - pEpInfo->epSet = pTask->info.epSet; - pEpInfo->nodeId = pTask->info.nodeId; - pEpInfo->taskId = pTask->id.taskId; - - return pEpInfo; -} - -void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) { - STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher; - pDispatcher->taskId = pTask->id.taskId; - pDispatcher->nodeId = pTask->info.nodeId; - pDispatcher->epSet = pTask->info.epSet; - - pDstTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH; - pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; -} - -int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream) { - SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask); - if (pEpInfo == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - if (pDownstream->pUpstreamEpInfoList == NULL) { - pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES); - } - - taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo); - return TSDB_CODE_SUCCESS; -} - static SArray* addNewTaskList(SArray* pTasksList) { SArray* pTaskList = taosArrayInit(0, POINTER_BYTES); taosArrayPush(pTasksList, &pTaskList); @@ -342,7 +308,7 @@ static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) { } static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream, - bool hasExtraSink, int64_t nextWindowSkey) { + SEpSet* pEpset, bool hasExtraSink, int64_t nextWindowSkey) { // create exec stream task, since only one level, the exec task is also the source task SArray* pTaskList = addNewTaskList(pStream->tasks); SSdb* pSdb = pMnode->pSdb; @@ -379,8 +345,8 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* // new stream task SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0, - hasExtraSink, nextWindowSkey); + int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, pEpset, + 0, hasExtraSink, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return -1; @@ -389,7 +355,7 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* if (pStream->conf.fillHistory) { SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL); code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid, - 1, hasExtraSink, nextWindowSkey); + pEpset, 1, hasExtraSink, nextWindowSkey); } sdbRelease(pSdb, pVgroup); @@ -406,13 +372,16 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* } static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask, - SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) { + SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, SEpSet* pEpset, + int64_t nextWindowSkey) { SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + epsetAssign(&(pTask)->info.mnodeEpset, pEpset); + // todo set the correct ts, which should be last key of queried table. STimeWindow* pWindow = &pTask->dataRange.window; pWindow->skey = INT64_MIN; @@ -422,22 +391,24 @@ static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t ui pWindow->skey, pWindow->ekey); // all the source tasks dispatch result to a single agg node. - setFixedDownstreamEpInfo(pTask, pDownstreamTask); + streamTaskSetFixedDownstreamInfo(pTask, pDownstreamTask); if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) { return -1; } - return setTaskUpstreamEpInfo(pTask, pDownstreamTask); + return streamTaskSetUpstreamInfo(pDownstreamTask, pTask); } static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream, - int32_t fillHistory, SStreamTask** pAggTask) { + SEpSet* pEpset, int32_t fillHistory, SStreamTask** pAggTask) { *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList); if (*pAggTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + epsetAssign(&(*pAggTask)->info.mnodeEpset, pEpset); + // dispatch if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) { return -1; @@ -446,8 +417,8 @@ static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeLi return 0; } -static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask, - SStreamTask** pHAggTask) { +static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SEpSet* pEpset, + SStreamTask** pAggTask, SStreamTask** pHAggTask) { SArray* pAggTaskList = addNewTaskList(pStream->tasks); SSdb* pSdb = pMnode->pSdb; @@ -461,7 +432,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan *pAggTask = NULL; SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask); + int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, pEpset, 0, pAggTask); if (code != TSDB_CODE_SUCCESS) { return -1; } @@ -489,7 +460,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL); *pHAggTask = NULL; - code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory, + code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pEpset, pStream->conf.fillHistory, pHAggTask); if (code != TSDB_CODE_SUCCESS) { if (pSnode != NULL) { @@ -519,7 +490,8 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan } static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream, - SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) { + SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, + SEpSet* pEpset, int64_t nextWindowSkey) { SArray* pSourceTaskList = addNewTaskList(pStream->tasks); SArray* pHSourceTaskList = NULL; @@ -549,7 +521,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } int32_t code = - doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey); + doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, pEpset, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); terrno = code; @@ -558,7 +530,7 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl if (pStream->conf.fillHistory) { code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup, - nextWindowSkey); + pEpset, nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return code; @@ -576,16 +548,16 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList, - int32_t fillHistory) { + SEpSet* pEpset, int32_t fillHistory) { SArray* pSinkTaskList = addNewTaskList(pTasksList); if (pStream->fixedSinkVgId == 0) { - if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) { + if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, pEpset, fillHistory) < 0) { // TODO free return -1; } } else { if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg, - fillHistory) < 0) { + pEpset, fillHistory) < 0) { // TODO free return -1; } @@ -599,11 +571,11 @@ static void setSinkTaskUpstreamInfo(SArray* pTasksList, const SStreamTask* pUpst SArray* pSinkTaskList = taosArrayGetP(pTasksList, SINK_NODE_LEVEL); for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); - setTaskUpstreamEpInfo(pUpstreamTask, pSinkTask); + streamTaskSetUpstreamInfo(pSinkTask, pUpstreamTask); } } -static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) { +static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey, SEpSet* pEpset) { SSdb* pSdb = pMnode->pSdb; int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans); @@ -626,7 +598,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* hasExtraSink = true; SArray* pSinkTaskList = NULL; - int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0); + int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, pEpset, 0); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -634,7 +606,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* // check for fill history if (pStream->conf.fillHistory) { SArray* pHSinkTaskList = NULL; - code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1); + code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, pEpset, 1); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -649,7 +621,7 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* SStreamTask* pAggTask = NULL; SStreamTask* pHAggTask = NULL; - int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask); + int32_t code = addAggTask(pStream, pMnode, pPlan, pEpset, &pAggTask, &pHAggTask); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -658,9 +630,9 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* setSinkTaskUpstreamInfo(pStream->pHTasksList, pHAggTask); // source level - return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey); + return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, pEpset, nextWindowSkey); } else if (numOfPlanLevel == 1) { - return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey); + return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, pEpset, hasExtraSink, nextWindowSkey); } return 0; @@ -673,7 +645,10 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindo return -1; } - int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey); + SEpSet mnodeEpset = {0}; + mndGetMnodeEpSet(pMnode, &mnodeEpset); + + int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey, &mnodeEpset); qDestroyQueryPlan(pPlan); return code; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 9a5429ba87..8484148642 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -1066,6 +1066,83 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq return TSDB_CODE_SUCCESS; } +static char* mndAuditFieldTypeStr(int32_t type){ + switch (type) + { + case TSDB_DATA_TYPE_NULL: + return "null"; + case TSDB_DATA_TYPE_BOOL: + return "bool"; + case TSDB_DATA_TYPE_TINYINT: + return "tinyint"; + case TSDB_DATA_TYPE_SMALLINT: + return "smallint"; + case TSDB_DATA_TYPE_INT: + return "int"; + case TSDB_DATA_TYPE_BIGINT: + return "bigint"; + case TSDB_DATA_TYPE_FLOAT: + return "float"; + case TSDB_DATA_TYPE_DOUBLE: + return "double"; + case TSDB_DATA_TYPE_VARCHAR: + return "varchar"; + case TSDB_DATA_TYPE_TIMESTAMP: + return "timestamp"; + case TSDB_DATA_TYPE_NCHAR: + return "nchar"; + case TSDB_DATA_TYPE_UTINYINT: + return "utinyint"; + case TSDB_DATA_TYPE_USMALLINT: + return "usmallint"; + case TSDB_DATA_TYPE_UINT: + return "uint"; + case TSDB_DATA_TYPE_UBIGINT: + return "ubigint"; + case TSDB_DATA_TYPE_JSON: + return "json"; + case TSDB_DATA_TYPE_VARBINARY: + return "varbinary"; + case TSDB_DATA_TYPE_DECIMAL: + return "decimal"; + case TSDB_DATA_TYPE_BLOB: + return "blob"; + case TSDB_DATA_TYPE_MEDIUMBLOB: + return "mediumblob"; + case TSDB_DATA_TYPE_GEOMETRY: + return "geometry"; + + default: + return "error"; + } +} + +static void mndAuditFieldStr(char* detail, SArray *arr, int32_t len, int32_t max){ + int32_t detialLen = strlen(detail); + int32_t fieldLen = 0; + for (int32_t i = 0; i < len; ++i) { + SField *pField = taosArrayGet(arr, i); + char field[TSDB_COL_NAME_LEN + 20] = {0}; + fieldLen = strlen(", "); + if(detialLen > 0 && detialLen < max-fieldLen-1) { + strcat(detail, ", "); + detialLen += fieldLen; + } + else{ + break; + } + sprintf(field, "%s:%s", pField->name, mndAuditFieldTypeStr(pField->type)); + fieldLen = strlen(field); + if(detialLen < max-fieldLen-1) { + strcat(detail, field); + detialLen += fieldLen; + } + else{ + break; + } + } +} + static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; int32_t code = -1; @@ -1174,7 +1251,7 @@ static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { } if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[2000] = {0}; + char detail[AUDIT_DETAIL_MAX] = {0}; sprintf(detail, "colVer:%d, delay1:%" PRId64 ", delay2:%" PRId64 ", deleteMark1:%" PRId64 ", " "deleteMark2:%" PRId64 ", igExists:%d, numOfColumns:%d, numOfFuncs:%d, numOfTags:%d, " "source:%d, suid:%" PRId64 ", tagVer:%d, ttl:%d, " @@ -1183,11 +1260,14 @@ static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { createReq.deleteMark2, createReq.igExists, createReq.numOfColumns, createReq.numOfFuncs, createReq.numOfTags, createReq.source, createReq.suid, createReq.tagVer, createReq.ttl, createReq.watermark1, createReq.watermark2); + + mndAuditFieldStr(detail, createReq.pColumns, createReq.numOfColumns, AUDIT_DETAIL_MAX); + mndAuditFieldStr(detail, createReq.pTags, createReq.numOfTags, AUDIT_DETAIL_MAX); SName name = {0}; - tNameFromString(&name, pDb->name, T_NAME_ACCT | T_NAME_DB); + tNameFromString(&name, createReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, createReq.name, detail); + auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, name.tname, detail); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -2532,9 +2612,9 @@ static int32_t mndProcessDropStbReq(SRpcMsg *pReq) { dropReq.igNotExists, dropReq.source); SName name = {0}; - tNameFromString(&name, pDb->name, T_NAME_ACCT | T_NAME_DB); + tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "dropStb", name.dbname, dropReq.name, detail); + auditRecord(pReq, pMnode->clusterId, "dropStb", name.dbname, name.tname, detail); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 03bb84b04d..9455aae8e3 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -14,6 +14,7 @@ */ #include "mndStream.h" +#include "audit.h" #include "mndDb.h" #include "mndDnode.h" #include "mndMnode.h" @@ -26,13 +27,35 @@ #include "mndUser.h" #include "mndVgroup.h" #include "parser.h" +#include "tmisce.h" #include "tname.h" -#include "audit.h" -#define MND_STREAM_VER_NUMBER 3 -#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_VER_NUMBER 4 +#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_MAX_NUM 60 +#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" -#define MND_STREAM_MAX_NUM 60 +typedef struct SNodeEntry { + int32_t nodeId; + SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. + int64_t hbTimestamp; // second +} SNodeEntry; + +typedef struct SStreamVnodeRevertIndex { + SArray *pNodeEntryList; + int64_t ts; // snapshot ts + SHashObj *pTaskMap; + SArray *pTaskList; + TdThreadMutex lock; +} SStreamVnodeRevertIndex; + +typedef struct SVgroupChangeInfo { + SHashObj *pDBMap; + SArray *pUpdateNodeList; // SArray +} SVgroupChangeInfo; + +static int32_t mndNodeCheckSentinel = 0; +static SStreamVnodeRevertIndex execNodeList; static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); @@ -41,6 +64,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessStreamHb(SRpcMsg *pReq); static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); @@ -50,6 +74,17 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter); static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq); static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq); +static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, + int64_t streamId, int32_t taskId); +static int32_t mndProcessNodeCheck(SRpcMsg *pReq); +static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); +static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode); + +static SArray *doExtractNodeListFromStream(SMnode *pMnode); +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode); +static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); +static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); +static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset); int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { @@ -64,16 +99,21 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_CREATE_STREAM, mndProcessCreateStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_DROP_STREAM, mndProcessDropStreamReq); - /*mndSetMsgHandle(pMnode, TDMT_MND_RECOVER_STREAM, mndProcessRecoverStreamReq);*/ + mndSetMsgHandle(pMnode, TDMT_MND_NODECHECK_TIMER, mndProcessNodeCheck); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DEPLOY_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_PAUSE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_RESUME_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_STOP_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_UPDATE_RSP, mndTransProcessRsp); - // mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); - // mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); mndSetMsgHandle(pMnode, TDMT_MND_PAUSE_STREAM, mndProcessPauseStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_RESUME_STREAM, mndProcessResumeStreamReq); @@ -83,10 +123,19 @@ int32_t mndInitStream(SMnode *pMnode) { mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndRetrieveStreamTask); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); + taosThreadMutexInit(&execNodeList.lock, NULL); + execNodeList.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); + execNodeList.pTaskList = taosArrayInit(4, sizeof(STaskStatusEntry)); + return sdbSetTable(pMnode->pSdb, table); } -void mndCleanupStream(SMnode *pMnode) {} +void mndCleanupStream(SMnode *pMnode) { + taosArrayDestroy(execNodeList.pTaskList); + taosHashCleanup(execNodeList.pTaskMap); + taosThreadMutexDestroy(&execNodeList.lock); + mDebug("mnd stream cleanup"); +} SSdbRaw *mndStreamActionEncode(SStreamObj *pStream) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -147,6 +196,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { if (sver != MND_STREAM_VER_NUMBER) { terrno = 0; + mError("stream read invalid ver, data ver: %d, curr ver: %d", sver, MND_STREAM_VER_NUMBER); goto STREAM_DECODE_OVER; } @@ -460,11 +510,7 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, SStreamTask *pTask) { STransAction action = {0}; action.mTraceId = pTrans->mTraceId; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = buf; - action.contLen = tlen; - action.msgType = TDMT_STREAM_TASK_DEPLOY; - + initTransAction(&action, buf, tlen, TDMT_STREAM_TASK_DEPLOY, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); return -1; @@ -640,8 +686,6 @@ _OVER: } static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { - // vnode - /*if (pTask->info.nodeId > 0) {*/ SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -653,15 +697,11 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { pReq->streamId = pTask->id.streamId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVDropStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_DROP; + initTransAction(&action, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; } - /*}*/ return 0; } @@ -762,16 +802,16 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } } - pDb = mndAcquireDb(pMnode, streamObj.sourceDb); - if (pDb->cfg.replications != 1) { - mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); - terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; - mndReleaseDb(pMnode, pDb); - pDb = NULL; - goto _OVER; - } + // pDb = mndAcquireDb(pMnode, streamObj.sourceDb); + // if (pDb->cfg.replications != 1) { + // mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); + // terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; + // mndReleaseDb(pMnode, pDb); + // pDb = NULL; + // goto _OVER; + // } - mndReleaseDb(pMnode, pDb); + // mndReleaseDb(pMnode, pDb); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); if (pTrans == NULL) { @@ -827,21 +867,32 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); + taosThreadMutexLock(&execNodeList.lock); + keepStreamTasksInBuf(&streamObj, &execNodeList); + taosThreadMutexUnlock(&execNodeList.lock); + code = TSDB_CODE_ACTION_IN_PROGRESS; char detail[2000] = {0}; - sprintf(detail, "checkpointFreq:%" PRId64 ", createStb:%d, deleteMark:%" PRId64 ", " + sprintf(detail, + "checkpointFreq:%" PRId64 ", createStb:%d, deleteMark:%" PRId64 + ", " "fillHistory:%d, igExists:%d, " - "igExpired:%d, igUpdate:%d, lastTs:%" PRId64 ", " - "maxDelay:%" PRId64 ", numOfTags:%d, sourceDB:%s, " + "igExpired:%d, igUpdate:%d, lastTs:%" PRId64 + ", " + "maxDelay:%" PRId64 + ", numOfTags:%d, sourceDB:%s, " "targetStbFullName:%s, triggerType:%d, watermark:%" PRId64, createStreamReq.checkpointFreq, createStreamReq.createStb, createStreamReq.deleteMark, - createStreamReq.fillHistory, createStreamReq.igExists, - createStreamReq.igExpired, createStreamReq.igUpdate, createStreamReq.lastTs, - createStreamReq.maxDelay, createStreamReq.numOfTags, createStreamReq.sourceDB, + createStreamReq.fillHistory, createStreamReq.igExists, createStreamReq.igExpired, createStreamReq.igUpdate, + createStreamReq.lastTs, createStreamReq.maxDelay, createStreamReq.numOfTags, createStreamReq.sourceDB, createStreamReq.targetStbFullName, createStreamReq.triggerType, createStreamReq.watermark); - auditRecord(pReq, pMnode->clusterId, "createStream", createStreamReq.name, "", detail); + SName name = {0}; + tNameFromString(&name, createStreamReq.name, T_NAME_ACCT | T_NAME_DB); + //reuse this function for stream + + auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, "", detail); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -855,55 +906,36 @@ _OVER: return code; } -#if 0 - static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; - SStreamObj *pStream = NULL; - - // iterate all stream obj - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) break; - // incr tick - int64_t currentTick = atomic_add_fetch_64(&pStream->currentTick, 1); - // if >= checkpointFreq, build msg TDMT_MND_STREAM_BEGIN_CHECKPOINT, put into write q - if (currentTick >= pStream->checkpointFreq) { - atomic_store_64(&pStream->currentTick, 0); - SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - - pMsg->streamId = pStream->uid; - pMsg->checkpointId = tGenIdPI64(); - memcpy(pMsg->streamName, pStream->name, TSDB_STREAM_FNAME_LEN); - - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, - .pCont = pMsg, - .contLen = sizeof(SMStreamDoCheckpointMsg), - }; - - tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); - } + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; } + int64_t checkpointId = taosGetTimestampMs(); + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + pMsg->checkpointId = checkpointId; + + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } -static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, const SStreamTask *pTask, - SMStreamDoCheckpointMsg *pMsg) { +static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, + int64_t streamId, int32_t taskId) { SStreamCheckpointSourceReq req = {0}; - req.checkpointId = pMsg->checkpointId; - req.nodeId = pTask->info.nodeId; + req.checkpointId = checkpointId; + req.nodeId = nodeId; req.expireTime = -1; - req.streamId = pTask->streamId; - req.taskId = pTask->taskId; + req.streamId = streamId; // pTask->id.streamId; + req.taskId = taskId; // pTask->id.taskId; int32_t code; int32_t blen; - tEncodeSize(tEncodeSStreamCheckpointSourceReq, &req, blen, code); + tEncodeSize(tEncodeStreamCheckpointSourceReq, &req, blen, code); if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -920,11 +952,11 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - tEncodeSStreamCheckpointSourceReq(&encoder, &req); + tEncodeStreamCheckpointSourceReq(&encoder, &req); SMsgHead *pMsgHead = (SMsgHead *)buf; pMsgHead->contLen = htonl(tlen); - pMsgHead->vgId = htonl(pTask->info.nodeId); + pMsgHead->vgId = htonl(nodeId); tEncoderClear(&encoder); @@ -933,95 +965,296 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con return 0; } +// static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { +// int64_t timestampMs = taosGetTimestampMs(); +// if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointTickInterval * 1000) { +// return -1; +// } -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; +// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, "stream-checkpoint"); +// if (pTrans == NULL) return -1; +// mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); +// if (mndTrancCheckConflict(pMnode, pTrans) != 0) { +// mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, +// checkpointId, +// tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); +// mndTransDrop(pTrans); +// return -1; +// } +// mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); +// atomic_store_64(&pStream->currentTick, 1); +// taosWLockLatch(&pStream->lock); +// // 1. redo action: broadcast checkpoint source msg for all source vg +// int32_t totLevel = taosArrayGetSize(pStream->tasks); +// for (int32_t i = 0; i < totLevel; i++) { +// SArray *pLevel = taosArrayGetP(pStream->tasks, i); +// SStreamTask *pTask = taosArrayGetP(pLevel, 0); +// if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { +// int32_t sz = taosArrayGetSize(pLevel); +// for (int32_t j = 0; j < sz; j++) { +// SStreamTask *pTask = taosArrayGetP(pLevel, j); +// /*A(pTask->info.nodeId > 0);*/ +// SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); +// if (pVgObj == NULL) { +// taosWUnLockLatch(&pStream->lock); +// mndTransDrop(pTrans); +// return -1; +// } - SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; +// void *buf; +// int32_t tlen; +// if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, +// pTask->id.taskId) < 0) { +// mndReleaseVgroup(pMnode, pVgObj); +// taosWUnLockLatch(&pStream->lock); +// mndTransDrop(pTrans); +// return -1; +// } - SStreamObj *pStream = mndAcquireStream(pMnode, pMsg->streamName); +// STransAction action = {0}; +// action.epSet = mndGetVgroupEpset(pMnode, pVgObj); +// action.pCont = buf; +// action.contLen = tlen; +// action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; - if (pStream == NULL || pStream->uid != pMsg->streamId) { - mError("start checkpointing failed since stream %s not found", pMsg->streamName); - return -1; - } +// mndReleaseVgroup(pMnode, pVgObj); - // build new transaction: - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "stream-checkpoint"); - if (pTrans == NULL) return -1; - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); - if (mndTransCheckConflict(pMnode, pTrans) != 0) { - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); - return -1; - } +// if (mndTransAppendRedoAction(pTrans, &action) != 0) { +// taosMemoryFree(buf); +// taosWUnLockLatch(&pStream->lock); +// mndReleaseStream(pMnode, pStream); +// mndTransDrop(pTrans); +// return -1; +// } +// } +// } +// } +// // 2. reset tick +// pStream->checkpointFreq = checkpointId; +// pStream->checkpointId = checkpointId; +// pStream->checkpointFreq = taosGetTimestampMs(); +// atomic_store_64(&pStream->currentTick, 0); +// // 3. commit log: stream checkpoint info +// pStream->version = pStream->version + 1; +// taosWUnLockLatch(&pStream->lock); + +// // // code condtion + +// SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); +// if (pCommitRaw == NULL) { +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { +// sdbFreeRaw(pCommitRaw); +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { +// sdbFreeRaw(pCommitRaw); +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } + +// if (mndTransPrepare(pMnode, pTrans) != 0) { +// mError("failed to prepare trans rebalance since %s", terrstr()); +// goto _ERR; +// } +// mndTransDrop(pTrans); +// return 0; +// _ERR: +// mndTransDrop(pTrans); +// return -1; +// } + +static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, + int64_t checkpointId) { + taosWLockLatch(&pStream->lock); - taosRLockLatch(&pStream->lock); - // 1. redo action: broadcast checkpoint source msg for all source vg int32_t totLevel = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < totLevel; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); SStreamTask *pTask = taosArrayGetP(pLevel, 0); + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { int32_t sz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < sz; j++) { - SStreamTask *pTask = taosArrayGetP(pLevel, j); + pTask = taosArrayGetP(pLevel, j); + if (pTask->info.fillHistory == 1) { + continue; + } /*A(pTask->info.nodeId > 0);*/ SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); if (pVgObj == NULL) { - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + taosWUnLockLatch(&pStream->lock); return -1; } void *buf; int32_t tlen; - if (mndBuildStreamCheckpointSourceReq(&buf, &tlen, pTask, pMsg) < 0) { - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + pTask->id.taskId) < 0) { + mndReleaseVgroup(pMnode, pVgObj); + taosWUnLockLatch(&pStream->lock); return -1; } STransAction action = {0}; - action.epSet = mndGetVgroupEpset(pMnode, pVgObj); - action.pCont = buf; - action.contLen = tlen; - action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; - + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset); mndReleaseVgroup(pMnode, pVgObj); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); - taosRUnLockLatch(&pStream->lock); - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); + taosWUnLockLatch(&pStream->lock); return -1; } } } } - // 2. reset tick + + pStream->checkpointId = checkpointId; + pStream->checkpointFreq = taosGetTimestampMs(); atomic_store_64(&pStream->currentTick, 0); // 3. commit log: stream checkpoint info - taosRUnLockLatch(&pStream->lock); + pStream->version = pStream->version + 1; - if (mndTransPrepare(pMnode, pTrans) != 0) { + taosWUnLockLatch(&pStream->lock); + + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL) { + mError("failed to prepare trans rebalance since %s", terrstr()); + return -1; + } + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + sdbFreeRaw(pCommitRaw); + mError("failed to prepare trans rebalance since %s", terrstr()); + return -1; + } + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { + sdbFreeRaw(pCommitRaw); mError("failed to prepare trans rebalance since %s", terrstr()); - mndTransDrop(pTrans); - mndReleaseStream(pMnode, pStream); return -1; } - - mndReleaseStream(pMnode, pStream); - mndTransDrop(pTrans); - return 0; } -#endif +static const char *mndGetStreamDB(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + return NULL; + } + + const char *p = taosStrdup(pStream->sourceDb); + mndReleaseStream(pMnode, pStream); + sdbCancelFetch(pSdb, pIter); + return p; +} + +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; + SStreamObj *pStream = NULL; + int32_t code = 0; + + { // check if the node update happens or not + int64_t ts = taosGetTimestampSec(); + + if (execNodeList.pNodeEntryList == NULL || (taosArrayGetSize(execNodeList.pNodeEntryList) == 0)) { + if (execNodeList.pNodeEntryList != NULL) { + execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); + } + + execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + } + + if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { + mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + execNodeList.ts = ts; + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; + } + + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); + + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); + bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0); + taosArrayDestroy(changeInfo.pUpdateNodeList); + taosHashCleanup(changeInfo.pDBMap); + taosArrayDestroy(pNodeSnapshot); + + if (nodeUpdated) { + mDebug("stream task not ready due to node update, not generate checkpoint"); + return 0; + } + } + + { // check if all tasks are in TASK_STATUS__NORMAL status + bool ready = true; + + taosThreadMutexLock(&execNodeList.lock); + for (int32_t i = 0; i < taosArrayGetSize(execNodeList.pTaskList); ++i) { + STaskStatusEntry *p = taosArrayGet(execNodeList.pTaskList, i); + if (p->status != TASK_STATUS__NORMAL) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, create checkpoint msg not issued", + p->streamId, p->taskId, 0, streamGetTaskStatusStr(p->status)); + ready = false; + break; + } + } + taosThreadMutexUnlock(&execNodeList.lock); + + if (!ready) { + return 0; + } + } + + SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + int64_t checkpointId = pMsg->checkpointId; + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, MND_STREAM_CHECKPOINT_NAME); + if (pTrans == NULL) { + mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return -1; + } + mDebug("start to trigger checkpoint, checkpointId: %" PRId64 "", checkpointId); + + const char *pDb = mndGetStreamDB(pMnode); + mndTransSetDbName(pTrans, pDb, "checkpoint"); + taosMemoryFree((void *)pDb); + + if (mndTransCheckConflict(pMnode, pTrans) != 0) { + mError("failed to trigger checkpoint, checkpointId: %" PRId64 ", reason:%s", checkpointId, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + mndTransDrop(pTrans); + return -1; + } + + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + + code = mndAddStreamCheckpointToTrans(pTrans, pStream, pMnode, checkpointId); + sdbRelease(pSdb, pStream); + if (code == -1) { + break; + } + } + + if (code == 0) { + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("failed to prepre trans rebalance since %s", terrstr()); + } + } + + mndTransDrop(pTrans); + return code; +} static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; @@ -1057,6 +1290,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { sdbRelease(pMnode->pSdb, pStream); return -1; } + mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); @@ -1065,6 +1299,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); return -1; } + // mndTransSetSerial(pTrans); // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { @@ -1091,7 +1326,11 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { char detail[100] = {0}; sprintf(detail, "igNotExists:%d", dropReq.igNotExists); - auditRecord(pReq, pMnode->clusterId, "dropStream", dropReq.name, "", detail); + SName name = {0}; + tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB); + //reuse this function for stream + + auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, "", detail); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); @@ -1381,18 +1620,18 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) { static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { SVPauseStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVPauseStreamTaskReq)); if (pReq == NULL) { + mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq), + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; pReq->streamId = pTask->id.streamId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVPauseStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_PAUSE; + initTransAction(&action, pReq, sizeof(SVPauseStreamTaskReq), TDMT_STREAM_TASK_PAUSE, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; @@ -1407,7 +1646,7 @@ int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray *tasks) { int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) { + if (mndPauseStreamTask(pTrans, pTask) < 0) { return -1; } @@ -1532,10 +1771,7 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SStreamTask *pTask, int8_t ig pReq->igUntreated = igUntreated; STransAction action = {0}; - memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); - action.pCont = pReq; - action.contLen = sizeof(SVResumeStreamTaskReq); - action.msgType = TDMT_STREAM_TASK_RESUME; + initTransAction(&action, pReq, sizeof(SVResumeStreamTaskReq), TDMT_STREAM_TASK_RESUME, &pTask->info.epSet); if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); return -1; @@ -1550,7 +1786,7 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { + if (mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { return -1; } @@ -1638,3 +1874,517 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return TSDB_CODE_ACTION_IN_PROGRESS; } + +static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChangeInfo *pInfo, int64_t streamId, + int32_t taskId) { + pMsg->streamId = streamId; + pMsg->taskId = taskId; + pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo)); + taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList); +} + +static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId, + int64_t streamId, int32_t taskId) { + SStreamTaskNodeUpdateMsg req = {0}; + initNodeUpdateMsg(&req, pInfo, streamId, taskId); + + int32_t code = 0; + int32_t blen; + + tEncodeSize(tEncodeStreamTaskUpdateMsg, &req, blen, code); + if (code < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + int32_t tlen = sizeof(SMsgHead) + blen; + + void *buf = taosMemoryMalloc(tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + tEncodeStreamTaskUpdateMsg(&encoder, &req); + + SMsgHead *pMsgHead = (SMsgHead *)buf; + pMsgHead->contLen = htonl(tlen); + pMsgHead->vgId = htonl(nodeId); + + tEncoderClear(&encoder); + + *pBuf = buf; + *pLen = tlen; + + return TSDB_CODE_SUCCESS; +} + +int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans) { + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL) { + mError("failed to encode stream since %s", terrstr()); + return -1; + } + + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("stream trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); + sdbFreeRaw(pCommitRaw); + mndTransDrop(pTrans); + return -1; + } + + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { + mError("stream trans:%d failed to set raw status since %s", pTrans->id, terrstr()); + sdbFreeRaw(pCommitRaw); + mndTransDrop(pTrans); + return -1; + } + + return 0; +} + +void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset) { + pAction->epSet = *pEpset; + pAction->contLen = contLen; + pAction->pCont = pCont; + pAction->msgType = msgType; +} + +// todo extract method: traverse stream tasks +// build trans to update the epset +static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, "stream-task-update"); + if (pTrans == NULL) { + mError("failed to build stream task DAG update, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return -1; + } + + mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); + + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + if (mndTransCheckConflict(pMnode, pTrans) != 0) { + mError("failed to build stream:0x%" PRIx64 " task DAG update, code:%s", pStream->uid, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + mndTransDrop(pTrans); + return -1; + } + + taosWLockLatch(&pStream->lock); + int32_t numOfLevels = taosArrayGetSize(pStream->tasks); + + for (int32_t j = 0; j < numOfLevels; ++j) { + SArray *pLevel = taosArrayGetP(pStream->tasks, j); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t k = 0; k < numOfTasks; ++k) { + SStreamTask *pTask = taosArrayGetP(pLevel, k); + + void *pBuf = NULL; + int32_t len = 0; + streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); + doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, pTask->id.streamId, pTask->id.taskId); + + STransAction action = {0}; + initTransAction(&action, pBuf, len, TDMT_VND_STREAM_TASK_UPDATE, &pTask->info.epSet); + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(pBuf); + taosWUnLockLatch(&pStream->lock); + return -1; + } + } + } + + taosWUnLockLatch(&pStream->lock); + + int32_t code = mndPersistTransLog(pStream, pTrans); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + + return TSDB_CODE_ACTION_IN_PROGRESS; +} + +static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) { + const SEp *pEp = GET_ACTIVE_EP(pPrevEpset); + + for (int32_t i = 0; i < pCurrent->numOfEps; ++i) { + const SEp *p = &(pCurrent->eps[i]); + if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) { + return false; + } + } + + return true; +} + +// 1. increase the replica does not affect the stream process. +// 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream +// tasks on the will be removed replica. +// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we will +// handle it as mentioned in 1 & 2 items. +static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList) { + SVgroupChangeInfo info = { + .pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo)), + .pDBMap = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK), + }; + + int32_t numOfNodes = taosArrayGetSize(pPrevNodeList); + for (int32_t i = 0; i < numOfNodes; ++i) { + SNodeEntry *pPrevEntry = taosArrayGet(pPrevNodeList, i); + + int32_t num = taosArrayGetSize(pNodeList); + for (int32_t j = 0; j < num; ++j) { + SNodeEntry *pCurrent = taosArrayGet(pNodeList, j); + + if (pCurrent->nodeId == pPrevEntry->nodeId) { + if (isNodeEpsetChanged(&pPrevEntry->epset, &pCurrent->epset)) { + const SEp *pPrevEp = GET_ACTIVE_EP(&pPrevEntry->epset); + + char buf[256] = {0}; + EPSET_TO_STR(&pCurrent->epset, buf); + mDebug("nodeId:%d epset changed detected, old:%s:%d -> new:%s", pCurrent->nodeId, pPrevEp->fqdn, + pPrevEp->port, buf); + + SNodeUpdateInfo updateInfo = {.nodeId = pPrevEntry->nodeId}; + epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset); + epsetAssign(&updateInfo.newEp, &pCurrent->epset); + taosArrayPush(info.pUpdateNodeList, &updateInfo); + + SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); + taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); + mndReleaseVgroup(pMnode, pVgroup); + } + + break; + } + } + } + + return info; +} + +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; + SVgObj *pVgroup = NULL; + + SArray *pVgroupListSnapshot = taosArrayInit(4, sizeof(SNodeEntry)); + + while (1) { + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); + if (pIter == NULL) { + break; + } + + SNodeEntry entry = {0}; + entry.epset = mndGetVgroupEpset(pMnode, pVgroup); + entry.nodeId = pVgroup->vgId; + entry.hbTimestamp = -1; + + taosArrayPush(pVgroupListSnapshot, &entry); + sdbRelease(pSdb, pVgroup); + } + + return pVgroupListSnapshot; +} + +static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) { + SSdb *pSdb = pMnode->pSdb; + + // check all streams that involved this vnode should update the epset info + SStreamObj *pStream = NULL; + void *pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + void *p = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb)); + void *p1 = taosHashGet(pChangeInfo->pDBMap, pStream->sourceDb, strlen(pStream->sourceDb)); + if (p == NULL && p1 == NULL) { + mndReleaseStream(pMnode, pStream); + continue; + } + + mDebug("stream:0x%" PRIx64 " involved node changed, create update trans", pStream->uid); + int32_t code = createStreamUpdateTrans(pMnode, pStream, pChangeInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return 0; +} + +static SArray *doExtractNodeListFromStream(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + taosWLockLatch(&pStream->lock); + int32_t numOfLevels = taosArrayGetSize(pStream->tasks); + + for (int32_t j = 0; j < numOfLevels; ++j) { + SArray *pLevel = taosArrayGetP(pStream->tasks, j); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t k = 0; k < numOfTasks; ++k) { + SStreamTask *pTask = taosArrayGetP(pLevel, k); + SNodeEntry entry = {0}; + epsetAssign(&entry.epset, &pTask->info.epSet); + entry.nodeId = pTask->info.nodeId; + entry.hbTimestamp = -1; + + taosHashPut(pHash, &entry.nodeId, sizeof(entry.nodeId), &entry, sizeof(entry)); + } + } + + taosWUnLockLatch(&pStream->lock); + sdbRelease(pSdb, pStream); + } + + SArray *plist = taosArrayInit(taosHashGetSize(pHash), sizeof(SNodeEntry)); + + // convert to list + pIter = NULL; + while ((pIter = taosHashIterate(pHash, pIter)) != NULL) { + SNodeEntry *pEntry = (SNodeEntry *)pIter; + taosArrayPush(plist, pEntry); + } + taosHashCleanup(pHash); + + return plist; +} + +static void doExtractTasksFromStream(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + keepStreamTasksInBuf(pStream, &execNodeList); + sdbRelease(pSdb, pStream); + } +} + +// this function runs by only one thread, so it is not multi-thread safe +static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { + int32_t code = 0; + int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); + if (old != 0) { + mDebug("still in checking node change"); + return 0; + } + + mDebug("start to do node change checking"); + int64_t ts = taosGetTimestampSec(); + + SMnode *pMnode = pMsg->info.node; + if (execNodeList.pNodeEntryList == NULL || (taosArrayGetSize(execNodeList.pNodeEntryList) == 0)) { + if (execNodeList.pNodeEntryList != NULL) { + execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); + } + + execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + } + + if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { + mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + execNodeList.ts = ts; + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; + } + + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); + + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); + if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { + code = mndProcessVgroupChange(pMnode, &changeInfo); + } + + taosArrayDestroy(changeInfo.pUpdateNodeList); + taosHashCleanup(changeInfo.pDBMap); + + // keep the new vnode snapshot + if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { + taosArrayDestroy(execNodeList.pNodeEntryList); + execNodeList.pNodeEntryList = pNodeSnapshot; + execNodeList.ts = ts; + } + + mDebug("end to do stream task node change checking"); + atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; +} + +typedef struct SMStreamNodeCheckMsg { + int8_t holder; // // to fix windows compile error, define place holder +} SMStreamNodeCheckMsg; + +static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; + } + + SMStreamNodeCheckMsg *pMsg = rpcMallocCont(sizeof(SMStreamNodeCheckMsg)); + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + return 0; +} + +static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode) { + int32_t level = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < level; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfTasks; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; + + void *p = taosHashGet(pExecNode->pTaskMap, keys, sizeof(keys)); + if (p == NULL) { + STaskStatusEntry entry = { + .streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .status = TASK_STATUS__STOP}; + taosArrayPush(pExecNode->pTaskList, &entry); + + int32_t ordinal = taosArrayGetSize(pExecNode->pTaskList) - 1; + taosHashPut(pExecNode->pTaskMap, keys, sizeof(keys), &ordinal, sizeof(ordinal)); + } + } + } +} + +// todo: this process should be executed by the write queue worker of the mnode +int32_t mndProcessStreamHb(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + + SStreamHbMsg req = {0}; + int32_t code = TSDB_CODE_SUCCESS; + + SDecoder decoder = {0}; + tDecoderInit(&decoder, pReq->pCont, pReq->contLen); + + if (tDecodeStreamHbMsg(&decoder, &req) < 0) { + tDecoderClear(&decoder); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + tDecoderClear(&decoder); + + // int64_t now = taosGetTimestampSec(); + mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); + + taosThreadMutexLock(&execNodeList.lock); + int32_t numOfExisted = taosHashGetSize(execNodeList.pTaskMap); + if (numOfExisted == 0) { + doExtractTasksFromStream(pMnode); + } + + for (int32_t i = 0; i < req.numOfTasks; ++i) { + STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); + int64_t k[2] = {p->streamId, p->taskId}; + int32_t index = *(int32_t *)taosHashGet(execNodeList.pTaskMap, &k, sizeof(k)); + + STaskStatusEntry *pStatusEntry = taosArrayGet(execNodeList.pTaskList, index); + pStatusEntry->status = p->status; + if (p->status != TASK_STATUS__NORMAL) { + mDebug("received s-task:0x%x not in ready status:%s", p->taskId, streamGetTaskStatusStr(p->status)); + } + } + taosThreadMutexUnlock(&execNodeList.lock); + + taosArrayDestroy(req.pTaskStatus); + + // bool nodeChanged = false; + // SArray* pList = taosArrayInit(4, sizeof(int32_t)); + /* + // record the timeout node + for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { + SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i); + int64_t duration = now - pEntry->hbTimestamp; + if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next + taosArrayPush(pList, &pEntry); + mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration); + continue; + } + + if (pEntry->nodeId != req.vgId) { + continue; + } + + pEntry->hbTimestamp = now; + + // check epset to identify whether the node has been transferred to other dnodes. + // node the epset is changed, which means the node transfer has occurred for this node. + // if (!isEpsetEqual(&pEntry->epset, &req.epset)) { + // nodeChanged = true; + // break; + // } + } + + // todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode, + // to identify whether the dnode is truely offline or not. + + // handle the node changed case + if (!nodeChanged) { + return TSDB_CODE_SUCCESS; + } + + int32_t nodeId = req.vgId; + + {// check all streams that involved this vnode should update the epset info + SStreamObj *pStream = NULL; + void *pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + // update the related upstream and downstream tasks, todo remove this, no need this function + taosWLockLatch(&pStream->lock); + // streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset); + // streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset); + taosWUnLockLatch(&pStream->lock); + + // code = createStreamUpdateTrans(pMnode, pStream, nodeId, ); + // if (code != TSDB_CODE_SUCCESS) { + // todo + //// } + // } + } + */ + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index 1e3a8bddb7..0b243e0a9c 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -635,14 +635,24 @@ static int32_t mndProcessCreateTopicReq(SRpcMsg *pReq) { code = TSDB_CODE_ACTION_IN_PROGRESS; } - char detail[1000] = {0}; - sprintf(detail, "igExists:%d, subStbName:%s, subType:%d, withMeta:%d", - createTopicReq.igExists, createTopicReq.subStbName, createTopicReq.subType, createTopicReq.withMeta); + char detail[4000] = {0}; + char sql[3000] = {0}; + strncpy(sql, createTopicReq.sql, 2999); - SName name = {0}; - tNameFromString(&name, createTopicReq.subDbName, T_NAME_ACCT | T_NAME_DB); + SName tableName = {0}; + tNameFromString(&tableName, createTopicReq.subStbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "crateTopic", createTopicReq.name, name.dbname, detail); + sprintf(detail, "igExists:%d, subStbName:%s, subType:%d, withMeta:%d, sql:%s", + createTopicReq.igExists, tableName.tname, createTopicReq.subType, createTopicReq.withMeta, sql); + + SName dbname = {0}; + tNameFromString(&dbname, createTopicReq.subDbName, T_NAME_ACCT | T_NAME_DB); + + SName topicName = {0}; + tNameFromString(&topicName, createTopicReq.name, T_NAME_ACCT | T_NAME_DB); + //reuse this function for topic + + auditRecord(pReq, pMnode->clusterId, "createTopic", topicName.dbname, dbname.dbname, detail); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -843,7 +853,11 @@ end: char detail[100] = {0}; sprintf(detail, "igNotExists:%d", dropReq.igNotExists); - auditRecord(pReq, pMnode->clusterId, "dropTopic", dropReq.name, "", detail); + SName name = {0}; + tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB); + //reuse this function for topic + + auditRecord(pReq, pMnode->clusterId, "dropTopic", name.dbname, "", detail); return TSDB_CODE_ACTION_IN_PROGRESS; } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 93a152f0cc..5d150b731c 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -28,9 +28,9 @@ #define TRANS_ARRAY_SIZE 8 #define TRANS_RESERVE_SIZE 48 -static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans); -static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOld); -static int32_t mndTransDelete(SSdb *pSdb, STrans *pTrans, bool callFunc); +static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans); +static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOld); +static int32_t mndTransDelete(SSdb *pSdb, STrans *pTrans, bool callFunc); static int32_t mndTransAppendLog(SArray *pArray, SSdbRaw *pRaw); static int32_t mndTransAppendAction(SArray *pArray, STransAction *pAction); @@ -100,10 +100,9 @@ static int32_t mndTransGetActionsSize(SArray *pArray) { return rawDataLen; } - static int32_t mndTransEncodeAction(SSdbRaw *pRaw, int32_t *offset, SArray *pActions, int32_t actionsNum) { int32_t dataPos = *offset; - int8_t unused = 0; + int8_t unused = 0; int32_t ret = -1; for (int32_t i = 0; i < actionsNum; ++i) { @@ -266,16 +265,16 @@ _OVER: SSdbRow *mndTransDecode(SSdbRaw *pRaw) { terrno = TSDB_CODE_INVALID_MSG; - SSdbRow *pRow = NULL; - STrans *pTrans = NULL; - char *pData = NULL; - int32_t dataLen = 0; - int8_t sver = 0; - int32_t prepareActionNum = 0; - int32_t redoActionNum = 0; - int32_t undoActionNum = 0; - int32_t commitActionNum = 0; - int32_t dataPos = 0; + SSdbRow *pRow = NULL; + STrans *pTrans = NULL; + char *pData = NULL; + int32_t dataLen = 0; + int8_t sver = 0; + int32_t prepareActionNum = 0; + int32_t redoActionNum = 0; + int32_t undoActionNum = 0; + int32_t commitActionNum = 0; + int32_t dataPos = 0; if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto _OVER; @@ -577,7 +576,7 @@ STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, ETrnConflct conflict, pTrans->undoActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); pTrans->commitActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); pTrans->pRpcArray = taosArrayInit(1, sizeof(SRpcHandleInfo)); - pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : 0; + pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : tGenIdPI64(); taosInitRWLatch(&pTrans->lockRpcArray); taosThreadMutexInit(&pTrans->mutex, NULL); @@ -1342,7 +1341,7 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans) { - bool continueExec = true; + bool continueExec = true; int32_t code = 0; int32_t numOfActions = taosArrayGetSize(pTrans->prepareActions); diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 135ec2d3b5..e718a12c5d 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -791,6 +791,67 @@ static int32_t mndRemoveTablePriviledge(SMnode *pMnode, SHashObj *hash, SHashObj return 0; } +static char* mndUserAuditTypeStr(int32_t type){ + if(type == TSDB_ALTER_USER_PASSWD){ + return "changePassword"; + } + if(type == TSDB_ALTER_USER_SUPERUSER){ + return "changeSuperUser"; + } + if(type == TSDB_ALTER_USER_ADD_READ_DB){ + return "addReadToDB"; + } + if(type == TSDB_ALTER_USER_ADD_READ_DB){ + return "addReadToDB"; + } + if(type == TSDB_ALTER_USER_REMOVE_READ_DB){ + return "removeReadFromDB"; + } + if(type == TSDB_ALTER_USER_ADD_WRITE_DB){ + return "addWriteToDB"; + } + if(type == TSDB_ALTER_USER_REMOVE_WRITE_DB){ + return "removeWriteFromDB"; + } + if(type == TSDB_ALTER_USER_ADD_ALL_DB){ + return "addToAllDB"; + } + if(type == TSDB_ALTER_USER_REMOVE_ALL_DB){ + return "removeFromAllDB"; + } + if(type == TSDB_ALTER_USER_ENABLE){ + return "enableUser"; + } + if(type == TSDB_ALTER_USER_SYSINFO){ + return "userSysInfo"; + } + if(type == TSDB_ALTER_USER_ADD_SUBSCRIBE_TOPIC){ + return "addSubscribeTopic"; + } + if(type == TSDB_ALTER_USER_REMOVE_SUBSCRIBE_TOPIC){ + return "removeSubscribeTopic"; + } + if(type == TSDB_ALTER_USER_ADD_READ_TABLE){ + return "addReadToTable"; + } + if(type == TSDB_ALTER_USER_REMOVE_READ_TABLE){ + return "removeReadFromTable"; + } + if(type == TSDB_ALTER_USER_ADD_WRITE_TABLE){ + return "addWriteToTable"; + } + if(type == TSDB_ALTER_USER_REMOVE_WRITE_TABLE){ + return "removeWriteFromTable"; + } + if(type == TSDB_ALTER_USER_ADD_ALL_TABLE){ + return "addToAllTable"; + } + if(type == TSDB_ALTER_USER_REMOVE_ALL_TABLE){ + return "removeFromAllTable"; + } + return "error"; +} + static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -978,11 +1039,14 @@ static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; char detail[1000] = {0}; - sprintf(detail, "alterType:%d, enable:%d, superUser:%d, sysInfo:%d, tabName:%s", - alterReq.alterType, alterReq.enable, alterReq.superUser, alterReq.sysInfo, alterReq.tabName); + sprintf(detail, "alterType:%s, enable:%d, superUser:%d, sysInfo:%d, tabName:%s, password:", + mndUserAuditTypeStr(alterReq.alterType), alterReq.enable, alterReq.superUser, alterReq.sysInfo, alterReq.tabName); if(alterReq.alterType == TSDB_ALTER_USER_PASSWD){ - auditRecord(pReq, pMnode->clusterId, "changePassword", alterReq.user, "", detail); + sprintf(detail, "alterType:%s, enable:%d, superUser:%d, sysInfo:%d, tabName:%s, password:xxx", + mndUserAuditTypeStr(alterReq.alterType), alterReq.enable, alterReq.superUser, alterReq.sysInfo, + alterReq.tabName); + auditRecord(pReq, pMnode->clusterId, "alterUser", alterReq.user, "", detail); } else if(alterReq.alterType == TSDB_ALTER_USER_SUPERUSER || alterReq.alterType == TSDB_ALTER_USER_ENABLE || diff --git a/source/dnode/mnode/sdb/src/sdbRaw.c b/source/dnode/mnode/sdb/src/sdbRaw.c index 3a16ee3f13..244e50b52e 100644 --- a/source/dnode/mnode/sdb/src/sdbRaw.c +++ b/source/dnode/mnode/sdb/src/sdbRaw.c @@ -46,7 +46,7 @@ SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) { void sdbFreeRaw(SSdbRaw *pRaw) { if (pRaw != NULL) { #if 1 - mTrace("raw:%p, is freed", pRaw); + mTrace("raw:%p, is freed, len:%d, table:%s", pRaw, pRaw->dataLen, sdbTableName(pRaw->type)); #endif taosMemoryFree(pRaw); } diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 635fdcf459..c1a59416f6 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -57,45 +57,49 @@ FAIL: } int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0); - - pTask->refCnt = 1; - pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); - - pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputInfo.queue = streamQueueOpen(512 << 10); - - if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { - return -1; + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamInfoList) != 0); + int32_t code = streamTaskInit(pTask, pSnode->pMeta, &pSnode->msgCb, ver); + if (code != TSDB_CODE_SUCCESS) { + return code; } - pTask->tsInfo.init = taosGetTimestampMs(); - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; - pTask->pMsgCb = &pSnode->msgCb; - pTask->chkInfo.version = ver; - pTask->pMeta = pSnode->pMeta; - streamTaskOpenAllUpstreamInput(pTask); pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { + qError("s-task:%s failed to open state for task", pTask->id.idStr); return -1; + } else { + qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamInfoList); SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; initStreamStateAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId); ASSERT(pTask->exec.pExecutor); - taosThreadMutexInit(&pTask->lock, NULL); + streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); - qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE, - pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel); + SCheckpointInfo* pChkInfo = &pTask->chkInfo; + // checkpoint ver is the kept version, handled data should be the next version. + if (pTask->chkInfo.checkpointId != 0) { + pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; + qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + } else { + if (pTask->chkInfo.currentVer == -1) { + pTask->chkInfo.currentVer = 0; + } + } + + qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", + SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, pTask->info.triggerParam); return 0; } @@ -113,12 +117,16 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } pSnode->msgCb = pOption->msgCb; - pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE); + pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } + // todo fix it: send msg to mnode to rollback to an existed checkpoint, and broadcast the rollback msg to all other + // computing nodes. + pSnode->pMeta->stage = 0; + return pSnode; FAIL: @@ -128,6 +136,7 @@ FAIL: } void sndClose(SSnode *pSnode) { + streamMetaNotifyClose(pSnode->pMeta); streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); taosMemoryFree(pSnode->path); @@ -173,7 +182,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); - streamTaskCheckDownstreamTasks(pTask); + streamTaskCheckDownstream(pTask); return 0; } @@ -216,7 +225,7 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); if (pTask) { - SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp, exec); streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; @@ -237,7 +246,7 @@ int32_t sndProcessTaskRetrieveReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.dstTaskId); if (pTask) { - SRpcMsg rsp = { .info = pMsg->info, .code = 0}; + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessRetrieveReq(pTask, &req, &rsp); streamMetaReleaseTask(pSnode->pMeta, pTask); tDeleteStreamRetrieveReq(&req); @@ -343,7 +352,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); streamMetaReleaseTask(pSnode->pMeta, pTask); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); @@ -351,9 +360,8 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; - qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 - ") from task:0x%x (vgId:%d), rsp status %d", - taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", + taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } SEncoder encoder; @@ -424,13 +432,13 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { return sndProcessTaskRetrieveReq(pSnode, pMsg); case TDMT_STREAM_RETRIEVE_RSP: return sndProcessTaskRetrieveRsp(pSnode, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); - case TDMT_STREAM_TASK_CHECK: + case TDMT_VND_STREAM_TASK_CHECK: return sndProcessStreamTaskCheckReq(pSnode, pMsg); - case TDMT_STREAM_TASK_CHECK_RSP: + case TDMT_VND_STREAM_TASK_CHECK_RSP: return sndProcessStreamTaskCheckRsp(pSnode, pMsg); default: ASSERT(0); diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index c2b41392e8..b66d811284 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -64,9 +64,12 @@ set( "src/tq/tqPush.c" "src/tq/tqSink.c" "src/tq/tqCommit.c" - "src/tq/tqRestore.c" + "src/tq/tqStreamTask.c" "src/tq/tqSnapshot.c" "src/tq/tqOffsetSnapshot.c" + "src/tq/tqStreamStateSnap.c" + "src/tq/tqStreamTaskSnap.c" + ) aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index f08c308185..1146cfdc46 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -45,8 +45,8 @@ extern "C" { typedef struct STqOffsetStore STqOffsetStore; // tqPush -#define EXTRACT_DATA_FROM_WAL_ID (-1) -#define STREAM_TASK_STATUS_CHECK_ID (-2) +#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) +#define STREAM_EXEC_TASK_STATUS_CHECK_ID (-2) // tqExec typedef struct { @@ -55,7 +55,7 @@ typedef struct { typedef struct { int64_t suid; - char* qmsg; // SubPlanToString + char* qmsg; // SubPlanToString SNode* node; } STqExecTb; @@ -81,18 +81,21 @@ typedef enum tq_handle_status { } tq_handle_status; typedef struct { - char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - int64_t consumerId; - int32_t epoch; - int8_t fetchMeta; - int64_t snapshotVer; - SWalReader* pWalReader; - SWalRef* pRef; -// STqPushHandle pushHandle; // push - STqExecHandle execHandle; // exec - SRpcMsg* msg; - tq_handle_status status; + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; + int64_t consumerId; + int32_t epoch; + int8_t fetchMeta; + int64_t snapshotVer; + SWalReader* pWalReader; + SWalRef* pRef; + // STqPushHandle pushHandle; // push + STqExecHandle execHandle; // exec + SRpcMsg* msg; + tq_handle_status status; } STqHandle; +typedef struct { + int64_t snapshotVer; +} SStreamHandle; struct STQ { SVnode* pVnode; @@ -109,17 +112,10 @@ struct STQ { SStreamMeta* pStreamMeta; }; -typedef struct { - int8_t inited; - tmr_h timer; -} STqMgmt; - typedef struct { int32_t size; } STqOffsetHead; -static STqMgmt tqMgmt = {0}; - int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle); int32_t tDecodeSTqHandle(SDecoder* pDecoder, STqHandle* pHandle); void tqDestroyTqHandle(void* data); @@ -159,7 +155,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore); // tqSink int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr); -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data); // tqOffset char* tqOffsetBuildFName(const char* path, int32_t fVer); @@ -167,8 +163,9 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); -int32_t tqStreamTasksScanWal(STQ* pTq); -int32_t tqStreamTasksStatusCheck(STQ* pTq); +int32_t tqScanWal(STQ* pTq); +int32_t tqCheckAndRunStreamTask(STQ* pTq); +int32_t tqStopStreamTasks(STQ* pTq); // tq util int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); @@ -176,6 +173,8 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset); +void tqUpdateNodeStage(STQ* pTq); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index be663c2be9..3355e771e2 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -89,10 +89,11 @@ typedef struct SQueryNode SQueryNode; #define VNODE_RSMA0_DIR "tsdb" #define VNODE_RSMA1_DIR "rsma1" #define VNODE_RSMA2_DIR "rsma2" +#define VNODE_TQ_STREAM "stream" #define VNODE_BUFPOOL_SEGMENTS 3 -#define VND_INFO_FNAME "vnode.json" +#define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" // vnd.h @@ -214,16 +215,19 @@ int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid int32_t tsdbSetKeepCfg(STsdb* pTsdb, STsdbCfg* pCfg); // tq -int tqInit(); -void tqCleanUp(); -STQ* tqOpen(const char* path, SVnode* pVnode); -void tqNotifyClose(STQ*); -void tqClose(STQ*); -int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); -int tqUnregisterPushHandle(STQ* pTq, void* pHandle); -int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. -int tqCheckStreamStatus(STQ* pTq); +int tqInit(); +void tqCleanUp(); +STQ* tqOpen(const char* path, SVnode* pVnode); +void tqNotifyClose(STQ*); +void tqClose(STQ*); +int tqPushMsg(STQ*, tmsg_t msgType); +int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); +int tqUnregisterPushHandle(STQ* pTq, void* pHandle); +int tqScanWalAsync(STQ* pTq, bool ckPause); +int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq); int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); @@ -313,6 +317,26 @@ int32_t tqOffsetWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqOffsetWriter int32_t tqOffsetWriterClose(STqOffsetWriter** ppWriter, int8_t rollback); int32_t tqOffsetSnapWrite(STqOffsetWriter* pWriter, uint8_t* pData, uint32_t nData); // SStreamTaskWriter ====================================== + +int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader); +int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader); +int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData); + +int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter); +int32_t streamTaskSnapWriterClose(SStreamTaskWriter* ppWriter, int8_t rollback); +int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData); + +int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader); +int32_t streamStateSnapReaderClose(SStreamStateReader* pReader); +int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData); + +int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter); +int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback); +int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId); + +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter); + // SStreamTaskReader ====================================== // SStreamStateWriter ===================================== // SStreamStateReader ===================================== @@ -476,7 +500,9 @@ enum { SNAP_DATA_TQ_HANDLE = 7, SNAP_DATA_TQ_OFFSET = 8, SNAP_DATA_STREAM_TASK = 9, - SNAP_DATA_STREAM_STATE = 10, + SNAP_DATA_STREAM_TASK_CHECKPOINT = 10, + SNAP_DATA_STREAM_STATE = 11, + SNAP_DATA_STREAM_STATE_BACKEND = 12, }; struct SSnapDataHdr { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a502e3e314..5b848b51bd 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -14,6 +14,13 @@ */ #include "tq.h" +#include "vnd.h" + +typedef struct { + int8_t inited; +} STqMgmt; + +static STqMgmt tqMgmt = {0}; // 0: not init // 1: already inited @@ -32,11 +39,6 @@ int32_t tqInit() { } if (old == 0) { - tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ"); - if (tqMgmt.timer == NULL) { - atomic_store_8(&tqMgmt.inited, 0); - return -1; - } if (streamInit() < 0) { return -1; } @@ -54,7 +56,6 @@ void tqCleanUp() { } if (old == 1) { - taosTmrCleanUp(tqMgmt.timer); streamCleanUp(); atomic_store_8(&tqMgmt.inited, 0); } @@ -127,14 +128,12 @@ int32_t tqInitialize(STQ* pTq) { return -1; } - pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId); + pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId, -1); if (pTq->pStreamMeta == NULL) { return -1; } - // the version is kept in task's meta data - // todo check if this version is required or not - if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pTq->pVnode->pWal)) < 0) { + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { return -1; } @@ -142,6 +141,7 @@ int32_t tqInitialize(STQ* pTq) { } void tqClose(STQ* pTq) { + qDebug("start to close tq"); if (pTq == NULL) { return; } @@ -151,7 +151,7 @@ void tqClose(STQ* pTq) { STqHandle* pHandle = *(STqHandle**)pIter; int32_t vgId = TD_VID(pTq->pVnode); - if(pHandle->msg != NULL) { + if (pHandle->msg != NULL) { tqPushEmptyDataRsp(pHandle, vgId); rpcFreeCont(pHandle->msg->pCont); taosMemoryFree(pHandle->msg); @@ -167,119 +167,17 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq->path); tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); + qDebug("end to close tq"); taosMemoryFree(pTq); } -static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { - bool inTimer = false; - - taosWLockLatch(&pMeta->lock); - - void* pIter = NULL; - while(1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->status.timerActive >= 1) { - inTimer = true; - } - } - - taosWUnLockLatch(&pMeta->lock); - - return inTimer; -} - void tqNotifyClose(STQ* pTq) { - if (pTq != NULL) { - taosWLockLatch(&pTq->pStreamMeta->lock); - - void* pIter = NULL; - while (1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr); - pTask->status.taskStatus = TASK_STATUS__STOP; - - int64_t st = taosGetTimestampMs(); - qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); - - int64_t el = taosGetTimestampMs() - st; - tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el); - } - - taosWUnLockLatch(&pTq->pStreamMeta->lock); - - tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId); - - int64_t st = taosGetTimestampMs(); - - while(hasStreamTaskInTimer(pTq->pStreamMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pTq->pStreamMeta->vgId); - taosMsleep(100); - } - - int64_t el = taosGetTimestampMs() - st; - tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el); + if (pTq == NULL) { + return; } + streamMetaNotifyClose(pTq->pStreamMeta); } -//static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, -// int64_t consumerId, int32_t type) { -// int32_t len = 0; -// int32_t code = 0; -// -// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) { -// tEncodeSize(tEncodeMqDataRsp, pRsp, len, code); -// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) { -// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); -// } -// -// if (code < 0) { -// return -1; -// } -// -// int32_t tlen = sizeof(SMqRspHead) + len; -// void* buf = rpcMallocCont(tlen); -// if (buf == NULL) { -// return -1; -// } -// -// ((SMqRspHead*)buf)->mqMsgType = type; -// ((SMqRspHead*)buf)->epoch = epoch; -// ((SMqRspHead*)buf)->consumerId = consumerId; -// -// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); -// -// SEncoder encoder = {0}; -// tEncoderInit(&encoder, abuf, len); -// -// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) { -// tEncodeMqDataRsp(&encoder, pRsp); -// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) { -// tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp); -// } -// -// tEncoderClear(&encoder); -// -// SRpcMsg rsp = { -// .info = *pRpcHandleInfo, -// .pCont = buf, -// .contLen = tlen, -// .code = 0, -// }; -// -// tmsgSendRsp(&rsp); -// return 0; -//} - int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) { SMqPollReq req = {0}; if (tDeserializeSMqPollReq(pHandle->msg->pCont, pHandle->msg->contLen, &req) < 0) { @@ -293,33 +191,14 @@ int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) { dataRsp.blockNum = 0; char buf[TSDB_OFFSET_LEN] = {0}; tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset); - tqInfo("tqPushEmptyDataRsp to consumer:0x%"PRIx64 " vgId:%d, offset:%s, reqId:0x%" PRIx64, req.consumerId, vgId, buf, req.reqId); + tqInfo("tqPushEmptyDataRsp to consumer:0x%" PRIx64 " vgId:%d, offset:%s, reqId:0x%" PRIx64, req.consumerId, vgId, buf, + req.reqId); tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); tDeleteMqDataRsp(&dataRsp); return 0; } -//int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) { -// SMqDataRsp dataRsp = {0}; -// dataRsp.head.consumerId = pHandle->consumerId; -// dataRsp.head.epoch = pHandle->epoch; -// dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; -// -// int64_t sver = 0, ever = 0; -// walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); -// tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP, sver, -// ever); -// -// char buf1[TSDB_OFFSET_LEN] = {0}; -// char buf2[TSDB_OFFSET_LEN] = {0}; -// tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset); -// tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset); -// tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", vgId, -// dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2); -// return 0; -//} - int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type, int32_t vgId) { int64_t sver = 0, ever = 0; @@ -332,8 +211,8 @@ int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* tFormatOffset(buf1, TSDB_OFFSET_LEN, &pRsp->reqOffset); tFormatOffset(buf2, TSDB_OFFSET_LEN, &pRsp->rspOffset); - tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, vgId, - pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId); + tqDebug("tmq poll vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, + vgId, pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId); return 0; } @@ -366,7 +245,7 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); if (pSavedOffset != NULL && tqOffsetEqual(pOffset, pSavedOffset)) { tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64, - vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version); + vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version); return 0; // no need to update the offset value } @@ -379,10 +258,10 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t } int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) { - SMqSeekReq req = {0}; - int32_t vgId = TD_VID(pTq->pVnode); - SRpcMsg rsp = {.info = pMsg->info}; - int code = 0; + SMqSeekReq req = {0}; + int32_t vgId = TD_VID(pTq->pVnode); + SRpcMsg rsp = {.info = pMsg->info}; + int code = 0; if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -407,8 +286,8 @@ int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) { goto end; } - //if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to TMQ_VG_STATUS__IDLE, - //otherwise poll data failed after seek. + // if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to + // TMQ_VG_STATUS__IDLE, otherwise poll data failed after seek. tqUnregisterPushHandle(pTq, pHandle); taosRUnLockLatch(&pTq->lock); @@ -417,85 +296,85 @@ end: tmsgSendRsp(&rsp); return 0; -// SMqVgOffset vgOffset = {0}; -// int32_t vgId = TD_VID(pTq->pVnode); -// -// SDecoder decoder; -// tDecoderInit(&decoder, (uint8_t*)msg, msgLen); -// if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { -// tqError("vgId:%d failed to decode seek msg", vgId); -// return -1; -// } -// -// tDecoderClear(&decoder); -// -// tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, -// vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); -// -// STqOffset* pOffset = &vgOffset.offset; -// if (pOffset->val.type != TMQ_OFFSET__LOG) { -// tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); -// return -1; -// } -// -// STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); -// if (pHandle == NULL) { -// tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey); -// terrno = TSDB_CODE_INVALID_MSG; -// return -1; -// } -// -// // 2. check consumer-vg assignment status -// taosRLockLatch(&pTq->lock); -// if (pHandle->consumerId != vgOffset.consumerId) { -// tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, -// vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); -// terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; -// taosRUnLockLatch(&pTq->lock); -// return -1; -// } -// taosRUnLockLatch(&pTq->lock); -// -// // 3. check the offset info -// STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); -// if (pSavedOffset != NULL) { -// if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { -// tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); -// return 0; // no need to update the offset value -// } -// -// if (pSavedOffset->val.version == pOffset->val.version) { -// tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, -// pOffset->val.version, pSavedOffset->val.version); -// return 0; -// } -// } -// -// int64_t sver = 0, ever = 0; -// walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); -// if (pOffset->val.version < sver) { -// pOffset->val.version = sver; -// } else if (pOffset->val.version > ever) { -// pOffset->val.version = ever; -// } -// -// // save the new offset value -// if (pSavedOffset != NULL) { -// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, -// pSavedOffset->val.version); -// } else { -// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); -// } -// -// if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { -// tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); -// return -1; -// } -// -// tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, -// vgOffset.consumerId, vgOffset.offset.val.version); -// -// return 0; + // SMqVgOffset vgOffset = {0}; + // int32_t vgId = TD_VID(pTq->pVnode); + // + // SDecoder decoder; + // tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + // if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { + // tqError("vgId:%d failed to decode seek msg", vgId); + // return -1; + // } + // + // tDecoderClear(&decoder); + // + // tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, + // vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); + // + // STqOffset* pOffset = &vgOffset.offset; + // if (pOffset->val.type != TMQ_OFFSET__LOG) { + // tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); + // return -1; + // } + // + // STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); + // if (pHandle == NULL) { + // tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, + // pOffset->subKey); terrno = TSDB_CODE_INVALID_MSG; return -1; + // } + // + // // 2. check consumer-vg assignment status + // taosRLockLatch(&pTq->lock); + // if (pHandle->consumerId != vgOffset.consumerId) { + // tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" + // PRIx64, + // vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); + // terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + // taosRUnLockLatch(&pTq->lock); + // return -1; + // } + // taosRUnLockLatch(&pTq->lock); + // + // // 3. check the offset info + // STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); + // if (pSavedOffset != NULL) { + // if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { + // tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); + // return 0; // no need to update the offset value + // } + // + // if (pSavedOffset->val.version == pOffset->val.version) { + // tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, + // pOffset->val.version, pSavedOffset->val.version); + // return 0; + // } + // } + // + // int64_t sver = 0, ever = 0; + // walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); + // if (pOffset->val.version < sver) { + // pOffset->val.version = sver; + // } else if (pOffset->val.version > ever) { + // pOffset->val.version = ever; + // } + // + // // save the new offset value + // if (pSavedOffset != NULL) { + // tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, + // pSavedOffset->val.version); + // } else { + // tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); + // } + // + // if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { + // tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); + // return -1; + // } + // + // tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, + // vgOffset.consumerId, vgOffset.offset.val.version); + // + // return 0; } int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { @@ -538,8 +417,11 @@ int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) { tqError("pHandle->msg should not be null"); taosHashCancelIterate(pTq->pPushMgr, pIter); break; - }else{ - SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; + } else { + SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, + .pCont = pHandle->msg->pCont, + .contLen = pHandle->msg->contLen, + .info = pHandle->msg->info}; tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); taosMemoryFree(pHandle->msg); pHandle->msg = NULL; @@ -574,10 +456,10 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // 1. find handle pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { - do{ - if (tqMetaGetHandle(pTq, req.subKey) == 0){ + do { + if (tqMetaGetHandle(pTq, req.subKey) == 0) { pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if(pHandle != NULL){ + if (pHandle != NULL) { break; } } @@ -585,7 +467,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { terrno = TSDB_CODE_INVALID_MSG; taosWUnLockLatch(&pTq->lock); return -1; - }while(0); + } while (0); } // 2. check re-balance status @@ -636,7 +518,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) { - void* data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + void* data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); SMqVgOffset vgOffset = {0}; @@ -678,7 +560,6 @@ int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) { SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0}; tmsgSendRsp(&rsp); - return 0; } @@ -730,7 +611,7 @@ int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) { if (reqOffset.type == TMQ_OFFSET__LOG) { dataRsp.rspOffset.version = reqOffset.version; - } else if(reqOffset.type < 0){ + } else if (reqOffset.type < 0) { STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey); if (pOffset != NULL) { if (pOffset->val.type != TMQ_OFFSET__LOG) { @@ -741,14 +622,16 @@ int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) { } dataRsp.rspOffset.version = pOffset->val.version; - tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%"PRId64, consumerId, vgId, req.subKey, dataRsp.rspOffset.version); - }else{ + tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from store:%" PRId64, consumerId, vgId, + req.subKey, dataRsp.rspOffset.version); + } else { if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEST) { dataRsp.rspOffset.version = sver; // not consume yet, set the earliest position } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { dataRsp.rspOffset.version = ever; } - tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%"PRId64, consumerId, vgId, req.subKey, dataRsp.rspOffset.version); + tqInfo("consumer:0x%" PRIx64 " vgId:%d subkey:%s get assignment from init:%" PRId64, consumerId, vgId, req.subKey, + dataRsp.rspOffset.version); } } else { tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey, @@ -841,7 +724,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg SMqRebVgReq req = {0}; SDecoder dc = {0}; - tDecoderInit(&dc, msg, msgLen); + tDecoderInit(&dc, (uint8_t*)msg, msgLen); // decode req if (tDecodeSMqRebVgReq(&dc, &req) < 0) { @@ -851,12 +734,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, - req.oldConsumerId, req.newConsumerId); + req.oldConsumerId, req.newConsumerId); STqHandle* pHandle = NULL; - while(1){ + while (1) { pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if (pHandle || tqMetaGetHandle(pTq, req.subKey) < 0){ + if (pHandle || tqMetaGetHandle(pTq, req.subKey) < 0) { break; } } @@ -872,7 +755,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } STqHandle handle = {0}; ret = tqCreateHandle(pTq, &req, &handle); - if(ret < 0){ + if (ret < 0) { tqDestroyTqHandle(&handle); goto end; } @@ -883,7 +766,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); } else { - tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); + tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, + req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_store_32(&pHandle->epoch, 0); tqUnregisterPushHandle(pTq, pHandle); @@ -901,52 +785,41 @@ void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { int32_t vgId = TD_VID(pTq->pVnode); + tqDebug("s-task:0x%x start to expand task", pTask->id.taskId); - pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); - pTask->refCnt = 1; - pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputInfo.queue = streamQueueOpen(512 << 10); - - if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { - tqError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); - return -1; + int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, ver); + if (code != TSDB_CODE_SUCCESS) { + return code; } - pTask->tsInfo.init = taosGetTimestampMs(); - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; - pTask->pMsgCb = &pTq->pVnode->msgCb; - pTask->pMeta = pTq->pStreamMeta; - streamTaskOpenAllUpstreamInput(pTask); - // backup the initial status, and set it to be TASK_STATUS__INIT - pTask->chkInfo.version = ver; - pTask->chkInfo.currentVer = ver; - - pTask->dataRange.range.maxVer = ver; - pTask->dataRange.range.minVer = ver; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + SStreamTask* pStateTask = pTask; + SStreamTask task = {0}; if (pTask->info.fillHistory) { task.id = pTask->streamTaskId; task.pMeta = pTask->pMeta; - pSateTask = &task; + pStateTask = &task; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pStateTask, false, -1, -1); if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - SReadHandle handle = {.vnode = pTq->pVnode, - .initTqReader = 1, - .pStateBackend = pTask->pState, - .fillHistory = pTask->info.fillHistory, - .winRange = pTask->dataRange.window}; + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = pTq->pVnode, + .initTqReader = 1, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; + initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId, pTask->id.taskId); @@ -957,23 +830,31 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + SStreamTask task = {0}; if (pTask->info.fillHistory) { task.id = pTask->streamTaskId; task.pMeta = pTask->pMeta; pSateTask = &task; } + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList); - SReadHandle handle = {.vnode = NULL, - .numOfVgroups = numOfVgroups, - .pStateBackend = pTask->pState, - .fillHistory = pTask->info.fillHistory, - .winRange = pTask->dataRange.window}; + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamInfoList); + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = NULL, + .numOfVgroups = numOfVgroups, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; + initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId, pTask->id.taskId); @@ -993,7 +874,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { int32_t ver1 = 1; SMetaInfo info = {0}; - int32_t code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); + code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); if (code == TSDB_CODE_SUCCESS) { ver1 = info.skmVer; } @@ -1003,6 +884,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->tbSink.pTSchema == NULL) { return -1; } + pTask->tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr); } @@ -1014,20 +896,27 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { // reset the task status from unfinished transaction if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { - tqWarn("s-task:%s reset task status to be normal, kept in meta status: Paused", pTask->id.idStr); + tqWarn("s-task:%s reset task status to be normal, status kept in taskMeta: Paused", pTask->id.idStr); pTask->status.taskStatus = TASK_STATUS__NORMAL; } - taosThreadMutexInit(&pTask->lock, NULL); + streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); + SCheckpointInfo* pChkInfo = &pTask->chkInfo; - tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 - " child id:%d, level:%d, fill-history:%d, trigger:%" PRId64 " ms, disable pause", - vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel, - pTask->info.fillHistory, pTask->triggerParam); + // checkpoint ver is the kept version, handled data should be the next version. + if (pTask->chkInfo.checkpointId != 0) { + pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; + tqInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + } + + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, pTask->info.triggerParam); - // next valid version will add one - pTask->chkInfo.version += 1; return 0; } @@ -1057,12 +946,12 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); streamMetaReleaseTask(pTq->pStreamMeta, pTask); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - tqDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", - pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", + pTask->id.idStr, pStatus, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 @@ -1074,7 +963,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t code; @@ -1083,7 +972,6 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)pReq, len); code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); - if (code < 0) { tDecoderClear(&decoder); return -1; @@ -1095,8 +983,8 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, - pTq->pStreamMeta->vgId); + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed", + rsp.streamId, rsp.upstreamTaskId, pTq->pStreamMeta->vgId); terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; return -1; } @@ -1111,9 +999,12 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms int32_t vgId = TD_VID(pTq->pVnode); if (tsDisableStream) { + tqInfo("vgId:%d stream disabled, not deploy stream tasks", vgId); return 0; } + tqDebug("vgId:%d receive new stream task deploy msg, start to build stream task", vgId); + // 1.deserialize msg and build task SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { @@ -1126,18 +1017,18 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); code = tDecodeStreamTask(&decoder, pTask); + tDecoderClear(&decoder); + if (code < 0) { - tDecoderClear(&decoder); taosMemoryFree(pTask); return -1; } - tDecoderClear(&decoder); - SStreamMeta* pStreamMeta = pTq->pStreamMeta; - // 2.save task, use the newest commit version as the initial start version of stream task. + // 2.save task, use the latest commit version as the initial start version of stream task. int32_t taskId = pTask->id.taskId; + int64_t streamId = pTask->id.streamId; bool added = false; taosWLockLatch(&pStreamMeta->lock); @@ -1146,21 +1037,34 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms taosWUnLockLatch(&pStreamMeta->lock); if (code < 0) { - tqError("vgId:%d failed to add s-task:0x%x, total:%d", vgId, pTask->id.taskId, numOfTasks); + tqError("vgId:%d failed to add s-task:0x%x, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); tFreeStreamTask(pTask); return -1; } - // not added into meta store + // added into meta store, pTask cannot be reference since it may have been destroyed by other threads already now if + // it is added into the meta store if (added) { - tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); - SStreamTask* p = streamMetaAcquireTask(pStreamMeta, pTask->id.streamId, taskId); - if (p != NULL) { // reset the downstreamReady flag. - streamTaskCheckDownstreamTasks(p); + // only handled in the leader node + if (vnodeIsRoleLeader(pTq->pVnode)) { + tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); + SStreamTask* p = streamMetaAcquireTask(pStreamMeta, streamId, taskId); + + bool restored = pTq->pVnode->restored; + if (p != NULL && restored) { + streamTaskCheckDownstream(p); + } else if (!restored) { + tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); + } + + if (p != NULL) { + streamMetaReleaseTask(pStreamMeta, p); + } + } else { + tqDebug("vgId:%d not leader, not launch stream task s-task:0x%x", vgId, taskId); } - streamMetaReleaseTask(pStreamMeta, p); } else { - tqWarn("vgId:%d failed to add s-task:0x%x, already exists in meta store", vgId, taskId); + tqWarn("vgId:%d failed to add s-task:0x%x, since already exists in meta store", vgId, taskId); tFreeStreamTask(pTask); } @@ -1191,7 +1095,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamTaskEnablePause(pTask); } } else { - tqDebug("s-task:%s resume from paused, start ts:%"PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); + tqDebug("s-task:%s resume from paused, start ts:%" PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); } // we have to continue retrying to successfully execute the scan history task. @@ -1268,7 +1172,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { if (done) { pTask->tsInfo.step2Start = taosGetTimestampMs(); qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, 0.0); - appendTranstateIntoInputQ(pTask); + streamTaskPutTranstateIntoInputQ(pTask); streamTryExec(pTask); // exec directly } else { STimeWindow* pWindow = &pTask->dataRange.window; @@ -1295,15 +1199,12 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamSetStatusNormal(pTask); } - tqStartStreamTasks(pTq); + tqScanWalAsync(pTq, false); } streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pStreamTask); } else { - // todo update the chkInfo version for current task. - // this task has an associated history stream task, so we need to scan wal from the end version of - // history scan. The current version of chkInfo.current is not updated during the history scan STimeWindow* pWindow = &pTask->dataRange.window; if (pTask->historyTaskId.taskId == 0) { @@ -1322,7 +1223,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { id, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); } - // notify the downstream agg tasks that upstream tasks are ready to processing the WAL data, update the code = streamTaskScanHistoryDataComplete(pTask); streamMetaReleaseTask(pMeta, pTask); @@ -1334,6 +1234,45 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } +// notify the downstream tasks to transfer executor state after handle all history blocks. +int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + + SStreamTransferReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pReq, len); + int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); + tDecoderClear(&decoder); + + tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, + req.downstreamTaskId); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); + if (pTask == NULL) { + tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", + req.downstreamTaskId); + return -1; + } + + int32_t remain = streamAlignTransferState(pTask); + if (remain > 0) { + tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; + } + + // transfer the ownership of executor state + tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); + ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); + + streamSchedExec(pTask); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; +} + +// only the agg tasks and the sink tasks will receive this message from upstream tasks int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -1381,12 +1320,13 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); if (remain > 0) { - tqDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, remain:%d not send finish rsp", + tqDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, unfinished remain:%d", pTask->id.idStr, req.downstreamId, remain); } else { tqDebug( "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " - "completed msg", pTask->id.idStr, req.downstreamId); + "completed msg", + pTask->id.idStr, req.downstreamId); streamProcessScanHistoryFinishRsp(pTask); } @@ -1394,72 +1334,19 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { - SDecoder* pCoder = &(SDecoder){0}; - SDeleteRes* pRes = &(SDeleteRes){0}; - - (*pRefBlock) = NULL; - - pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t)); - if (pRes->uidList == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - tDecoderInit(pCoder, (uint8_t*)pData, len); - tDecodeDeleteRes(pCoder, pRes); - tDecoderClear(pCoder); - - int32_t numOfTables = taosArrayGetSize(pRes->uidList); - if (numOfTables == 0 || pRes->affectedRows == 0) { - taosArrayDestroy(pRes->uidList); - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); - blockDataEnsureCapacity(pDelBlock, numOfTables); - pDelBlock->info.rows = numOfTables; - pDelBlock->info.version = ver; - - for (int32_t i = 0; i < numOfTables; i++) { - // start key column - SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX); - colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false); // end key column - SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX); - colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false); - // uid column - SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX); - int64_t* pUid = taosArrayGet(pRes->uidList, i); - colDataSetVal(pUidCol, i, (const char*)pUid, false); - - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i); - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i); - colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i); - } - - taosArrayDestroy(pRes->uidList); - *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); - if ((*pRefBlock) == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; - (*pRefBlock)->pBlock = pDelBlock; - return TSDB_CODE_SUCCESS; -} - int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; int32_t taskId = pReq->taskId; int32_t vgId = TD_VID(pTq->pVnode); - if (taskId == STREAM_TASK_STATUS_CHECK_ID) { - tqStreamTasksStatusCheck(pTq); + if (taskId == STREAM_EXEC_TASK_STATUS_CHECK_ID) { + tqCheckAndRunStreamTask(pTq); return 0; } - if (taskId == EXTRACT_DATA_FROM_WAL_ID) { // all tasks are extracted submit data from the wal - tqStreamTasksScanWal(pTq); + if (taskId == STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID) { // all tasks are extracted submit data from the wal + tqScanWal(pTq); return 0; } @@ -1467,20 +1354,20 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { if (pTask != NULL) { // even in halt status, the data in inputQ must be processed int8_t st = pTask->status.taskStatus; - if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY) { + if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__CK) { tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr, - pTask->chkInfo.version); + pTask->chkInfo.currentVer); streamProcessRunReq(pTask); } else { atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, - pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); + pTask->id.idStr, streamGetTaskStatusStr(st), pTask->status.schedStatus); } streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqStartStreamTasks(pTq); + tqScanWalAsync(pTq, false); return 0; - } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. + } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. // todo add one function to handle this tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, taskId); return -1; @@ -1497,6 +1384,7 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecodeStreamDispatchReq(&decoder, &req); + tDecoderClear(&decoder); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.taskId); if (pTask) { @@ -1505,6 +1393,8 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } else { + tqError("vgId:%d failed to find task:0x%x to handle the dispatch req, it may have been destroyed already", + pTq->pStreamMeta->vgId, req.taskId); tDeleteStreamDispatchReq(&req); return -1; } @@ -1513,18 +1403,21 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t vgId = pTq->pStreamMeta->vgId; - int32_t taskId = htonl(pRsp->upstreamTaskId); - int64_t streamId = htobe64(pRsp->streamId); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, streamId, taskId); + int32_t vgId = pTq->pStreamMeta->vgId; + pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); + pRsp->streamId = htobe64(pRsp->streamId); + pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); + pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pRsp->streamId, pRsp->upstreamTaskId); if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return TSDB_CODE_SUCCESS; } else { - tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, taskId); - return TSDB_CODE_INVALID_MSG; + tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, pRsp->upstreamTaskId); + terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + return terrno; } } @@ -1555,7 +1448,7 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } tqDebug("s-task:%s receive pause msg from mnode", pTask->id.idStr); - streamTaskPause(pTask); + streamTaskPause(pTask, pMeta); SStreamTask* pHistoryTask = NULL; if (pTask->historyTaskId.taskId != 0) { @@ -1571,7 +1464,7 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr); - streamTaskPause(pHistoryTask); + streamTaskPause(pHistoryTask, pMeta); streamMetaReleaseTask(pMeta, pHistoryTask); } @@ -1586,9 +1479,14 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, } // todo: handle the case: resume from halt to pause/ from halt to normal/ from pause to normal - streamTaskResume(pTask); + streamTaskResume(pTask, pTq->pStreamMeta); int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__SINK) { + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; + } + int8_t status = pTask->status.taskStatus; if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__SCAN_HISTORY) { // no lock needs to secure the access of the version @@ -1603,10 +1501,11 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); } - if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && + pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { streamStartScanHistoryAsync(pTask, igUntreated); - } else if (level == TASK_LEVEL__SOURCE && (taosQueueItemSize(pTask->inputQueue->queue) == 0)) { - tqStartStreamTasks(pTq); + } else if (level == TASK_LEVEL__SOURCE && (taosQueueItemSize(pTask->inputInfo.queue->pQueue) == 0)) { + tqScanWalAsync(pTq, false); } else { streamSchedExec(pTask); } @@ -1618,13 +1517,14 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); if (code != 0) { return code; } - SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); + SStreamTask* pHistoryTask = + streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); if (pHistoryTask) { code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated); } @@ -1644,18 +1544,17 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { tDecoderClear(&decoder); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.dstTaskId); - - if (pTask) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessRetrieveReq(pTask, &req, &rsp); - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tDeleteStreamRetrieveReq(&req); - return 0; - } else { - tDeleteStreamRetrieveReq(&req); + if (pTask == NULL) { + // tDeleteStreamDispatchReq(&req); return -1; } + + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; + streamProcessRetrieveReq(pTask, &req, &rsp); + + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + tDeleteStreamRetrieveReq(&req); + return 0; } int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { @@ -1663,8 +1562,9 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; } +// todo refactor. int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { - STQ* pTq = pVnode->pTq; + STQ* pTq = pVnode->pTq; int32_t vgId = pVnode->config.vgId; SMsgHead* msgStr = pMsg->pCont; @@ -1683,7 +1583,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { tDecoderClear(&decoder); int32_t taskId = req.taskId; - tqDebug("vgId:%d receive dispatch msg to s-task:0x%"PRIx64"-0x%x", vgId, req.streamId, taskId); + tqDebug("vgId:%d receive dispatch msg to s-task:0x%" PRIx64 "-0x%x", vgId, req.streamId, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { @@ -1694,7 +1594,6 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { taosFreeQitem(pMsg); return 0; } else { - tDeleteStreamDispatchReq(&req); } @@ -1726,7 +1625,7 @@ FAIL: pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); - SRpcMsg rsp = { .code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; + SRpcMsg rsp = {.code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); tmsgSendRsp(&rsp); @@ -1737,3 +1636,199 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } +// todo error code cannot be return, since this is invoked by an mnode-launched transaction. +int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointSourceReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointSourceReq(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code)); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); + if (pTask == NULL) { + tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, + req.taskId); + return TSDB_CODE_SUCCESS; + } + + // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. + if (pTask->status.downstreamReady != 1) { + qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 + ", set it failure", pTask->id.idStr, req.checkpointId); + streamMetaReleaseTask(pMeta, pTask); + + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; + } + + int32_t total = 0; + taosWLockLatch(&pMeta->lock); + + // set the initial value for generating check point + // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed + if (pMeta->chkptNotReadyTasks == 0) { + pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); + pMeta->totalTasks = pMeta->chkptNotReadyTasks; + } + + total = taosArrayGetSize(pMeta->pTaskList); + taosWUnLockLatch(&pMeta->lock); + + qDebug("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg, chkpt:%" PRId64 ", total checkpoint req:%d", + pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, total); + + code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask, 1); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // todo: when generating checkpoint, no new tasks are allowed to add into current Vnode + // todo: when generating checkpoint, leader of mnode has transfer to other DNode? + streamProcessCheckpointSourceReq(pTask, &req); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task +int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointReadyMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); + return code; + } + + tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, + pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); + + streamProcessCheckpointReadyMsg(pTask); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { + SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t vgId = TD_VID(pTq->pVnode); + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + + SStreamTaskNodeUpdateMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { + rsp.code = TSDB_CODE_MSG_DECODE_ERROR; + tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); + goto _end; + } + + // update the nodeEpset when it exists + taosWLockLatch(&pMeta->lock); + + // when replay the WAL, we should update the task epset one again and again, the task may be in stop status. + int64_t keys[2] = {req.streamId, req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + + if (ppTask == NULL || *ppTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, + req.taskId); + rsp.code = TSDB_CODE_SUCCESS; + taosWUnLockLatch(&pMeta->lock); + goto _end; + } + + SStreamTask* pTask = *ppTask; + + tqDebug("s-task:%s receive task nodeEp update msg from mnode", pTask->id.idStr); + streamTaskUpdateEpsetInfo(pTask, req.pNodeList); + + { + streamSetStatusNormal(pTask); + streamMetaSaveTask(pMeta, pTask); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + } + + streamTaskStop(pTask); + tqDebug("s-task:%s task nodeEp update completed", pTask->id.idStr); + + pMeta->closedTask += 1; + + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + bool allStopped = (pMeta->closedTask == numOfTasks); + if (allStopped) { + pMeta->closedTask = 0; + } else { + tqDebug("vgId:%d closed tasks:%d, not closed:%d", vgId, pMeta->closedTask, (numOfTasks - pMeta->closedTask)); + } + + taosWUnLockLatch(&pMeta->lock); + +_end: + tDecoderClear(&decoder); + + if (allStopped) { + + if (!pTq->pVnode->restored) { + tqDebug("vgId:%d vnode restore not completed, not restart the tasks", vgId); + } else { + tqDebug("vgId:%d all tasks are stopped, restart them", vgId); + taosWLockLatch(&pMeta->lock); + + terrno = 0; + int32_t code = streamMetaReopen(pMeta, 0); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + taosWUnLockLatch(&pMeta->lock); + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + vInfo("vgId:%d, restart all stream tasks", vgId); + tqCheckAndRunStreamTaskAsync(pTq); + } + } + } + + return rsp.code; +} + diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 070a8ecf6f..62952078bc 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -30,7 +30,7 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { return 0; } -int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { +int32_t tqPushMsg(STQ* pTq, tmsg_t msgType) { if (msgType == TDMT_VND_SUBMIT) { tqProcessSubmitReqForSubscribe(pTq); } @@ -39,20 +39,14 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); taosRUnLockLatch(&pTq->pStreamMeta->lock); - tqTrace("handle submit, restore:%d, size:%d", pTq->pVnode->restored, numOfTasks); + tqDebug("handle submit, restore:%d, numOfTasks:%d", pTq->pVnode->restored, numOfTasks); // push data for stream processing: // 1. the vnode has already been restored. // 2. the vnode should be the leader. // 3. the stream is not suspended yet. - if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) { - if (numOfTasks == 0) { - return 0; - } - - if (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE) { - tqStartStreamTasks(pTq); - } + if ((!tsDisableStream) && (numOfTasks > 0) && (msgType == TDMT_VND_SUBMIT || msgType == TDMT_VND_DELETE)) { + tqScanWalAsync(pTq, true); } return 0; diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index cce31688bc..f7132ff6c4 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -21,9 +21,14 @@ typedef struct STableSinkInfo { uint64_t uid; - char tbName[TSDB_TABLE_NAME_LEN]; + tstr name; } STableSinkInfo; +static int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, + SSDataBlock* pDataBlock, SStreamTask* pTask); +static int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid); + int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { int32_t totalRows = pDataBlock->info.rows; @@ -97,17 +102,17 @@ end: return ret; } -static int32_t tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) { +static bool tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSinkInfo** pInfo) { void* pVal = tSimpleHashGet(pTableInfoMap, &groupId, sizeof(uint64_t)); if (pVal) { *pInfo = *(STableSinkInfo**)pVal; - return TSDB_CODE_SUCCESS; + return true; } - return TSDB_CODE_FAILED; + return false; } -int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) { +static int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) { if (tSimpleHashGetSize(tblInfo) > MAX_CACHE_TABLE_INFO_NUM) { return TSDB_CODE_FAILED; } @@ -115,7 +120,7 @@ int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTb return tSimpleHashPut(tblInfo, &groupId, sizeof(uint64_t), &pTbl, POINTER_BYTES); } -int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { +static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { void* buf = NULL; int32_t tlen = 0; encodeCreateChildTableForRPC(pReqs, TD_VID(pVnode), &buf, &tlen); @@ -128,66 +133,40 @@ int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { return TSDB_CODE_SUCCESS; } -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { + +void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data) { const SArray* pBlocks = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; int64_t suid = pTask->tbSink.stbUid; char* stbFullName = pTask->tbSink.stbFullName; STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t vgId = TD_VID(pVnode); + int32_t numOfBlocks = taosArrayGetSize(pBlocks); + int32_t code = TSDB_CODE_SUCCESS; - int32_t blockSz = taosArrayGetSize(pBlocks); + tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table", vgId, pTask->id.idStr, numOfBlocks); - tqDebug("vgId:%d, s-task:%s write results %d blocks into table", TD_VID(pVnode), pTask->id.idStr, blockSz); - - void* pBuf = NULL; SArray* tagArray = NULL; SArray* pVals = NULL; SArray* crTblArray = NULL; - for (int32_t i = 0; i < blockSz; i++) { + for (int32_t i = 0; i < numOfBlocks; i++) { SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); int32_t rows = pDataBlock->info.rows; if (pDataBlock->info.type == STREAM_DELETE_RESULT) { - SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - - tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); - if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { - taosArrayDestroy(deleteReq.deleteReqs); - continue; - } - - int32_t len; - int32_t code; - tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); - if (code != TSDB_CODE_SUCCESS) { - qError("s-task:%s failed to encode delete request", pTask->id.idStr); - } - - SEncoder encoder; - void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); - void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); - tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); - tEncoderClear(&encoder); - taosArrayDestroy(deleteReq.deleteReqs); - - ((SMsgHead*)serializedDeleteReq)->vgId = pVnode->config.vgId; - - SRpcMsg msg = { .msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead) }; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put delete req into write-queue since %s", terrstr()); - } + code = doSinkDeleteBlock(pVnode, stbFullName, pDataBlock, pTask, suid); } else if (pDataBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + tqDebug("s-task:%s build create table msg", pTask->id.idStr); + SVCreateTbBatchReq reqs = {0}; - crTblArray = reqs.pArray = taosArrayInit(1, sizeof(struct SVCreateTbReq)); + crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq)); if (NULL == reqs.pArray) { goto _end; } for (int32_t rowId = 0; rowId < rows; rowId++) { - SVCreateTbReq createTbReq = {0}; - SVCreateTbReq* pCreateTbReq = &createTbReq; + SVCreateTbReq* pCreateTbReq = &((SVCreateTbReq){0}); // set const pCreateTbReq->flags = 0; @@ -203,16 +182,14 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d int32_t size = taosArrayGetSize(pDataBlock->pDataBlock); if (size == 2) { tagArray = taosArrayInit(1, sizeof(STagVal)); + if (!tagArray) { tdDestroySVCreateTbReq(pCreateTbReq); goto _end; } STagVal tagVal = { - .cid = pTSchema->numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .i64 = (int64_t)pDataBlock->info.id.groupId, - }; + .cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; taosArrayPush(tagArray, &tagVal); @@ -227,6 +204,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d tdDestroySVCreateTbReq(pCreateTbReq); goto _end; } + for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); @@ -236,12 +214,13 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d continue; } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { tagVal.nData = varDataLen(pData); - tagVal.pData = varDataVal(pData); + tagVal.pData = (uint8_t*) varDataVal(pData); } else { memcpy(&tagVal.i64, pData, pTagData->info.bytes); } taosArrayPush(tagArray, &tagVal); } + } pCreateTbReq->ctb.tagNum = TMAX(size - UD_TAG_COLUMN_INDEX, 1); @@ -254,7 +233,6 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d goto _end; } - pCreateTbReq->ctb.pTag = (uint8_t*)pTag; // set table name @@ -265,232 +243,27 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d } else { pCreateTbReq->name = taosStrdup(pDataBlock->info.parTbName); } + taosArrayPush(reqs.pArray, pCreateTbReq); + tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name); } + reqs.nReqs = taosArrayGetSize(reqs.pArray); if (tqPutReqToQueue(pVnode, &reqs) != TSDB_CODE_SUCCESS) { goto _end; } + tagArray = taosArrayDestroy(tagArray); taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); crTblArray = NULL; + } else if (pDataBlock->info.type == STREAM_CHECKPOINT) { + continue; } else { - SSubmitTbData tbData = {0}; - tqDebug("tq sink pipe, convert block:%d, rows:%d", i, rows); - - if (!(tbData.aRowP = taosArrayInit(rows, sizeof(SRow*)))) { - goto _end; - } - - tbData.suid = suid; - tbData.uid = 0; // uid is assigned by vnode - tbData.sver = pTSchema->version; - - STableSinkInfo* pTableSinkInfo = NULL; - int32_t res = tqGetTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, &pTableSinkInfo); - if (res != TSDB_CODE_SUCCESS) { - pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo)); - } - - char* ctbName = pDataBlock->info.parTbName; - if (!ctbName[0]) { - memset(ctbName, 0, TSDB_TABLE_NAME_LEN); - if (res == TSDB_CODE_SUCCESS) { - memcpy(ctbName, pTableSinkInfo->tbName, strlen(pTableSinkInfo->tbName)); - } else { - buildCtbNameByGroupIdImpl(stbFullName, pDataBlock->info.id.groupId, ctbName); - memcpy(pTableSinkInfo->tbName, ctbName, strlen(ctbName)); - tqDebug("vgId:%d, gropuId:%" PRIu64 " datablock table name is null", TD_VID(pVnode), - pDataBlock->info.id.groupId); - } - } - - if (res == TSDB_CODE_SUCCESS) { - tbData.uid = pTableSinkInfo->uid; - } else { - SMetaReader mr = {0}; - metaReaderDoInit(&mr, pVnode->pMeta, 0); - if (metaGetTableEntryByName(&mr, ctbName) < 0) { - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - tqDebug("vgId:%d, stream write into %s, table auto created", TD_VID(pVnode), ctbName); - - SVCreateTbReq* pCreateTbReq = NULL; - - if (!(pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)))) { - goto _end; - }; - - // set const - pCreateTbReq->flags = 0; - pCreateTbReq->type = TSDB_CHILD_TABLE; - pCreateTbReq->ctb.suid = suid; - - // set super table name - SName name = {0}; - tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); // taosStrdup(stbFullName); - - // set tag content - tagArray = taosArrayInit(1, sizeof(STagVal)); - if (!tagArray) { - tdDestroySVCreateTbReq(pCreateTbReq); - taosMemoryFreeClear(pCreateTbReq); - goto _end; - } - STagVal tagVal = { - .cid = pTSchema->numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .i64 = (int64_t)pDataBlock->info.id.groupId, - }; - taosArrayPush(tagArray, &tagVal); - pCreateTbReq->ctb.tagNum = taosArrayGetSize(tagArray); - - STag* pTag = NULL; - tTagNew(tagArray, 1, false, &pTag); - tagArray = taosArrayDestroy(tagArray); - if (pTag == NULL) { - tdDestroySVCreateTbReq(pCreateTbReq); - taosMemoryFreeClear(pCreateTbReq); - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - pCreateTbReq->ctb.pTag = (uint8_t*)pTag; - - // set tag name - SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); - char tagNameStr[TSDB_COL_NAME_LEN] = {0}; - strcpy(tagNameStr, "group_id"); - taosArrayPush(tagName, tagNameStr); - pCreateTbReq->ctb.tagName = tagName; - - // set table name - pCreateTbReq->name = taosStrdup(ctbName); - - tbData.pCreateTbReq = pCreateTbReq; - tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE; - } else { - if (mr.me.type != TSDB_CHILD_TABLE) { - tqError("vgId:%d, failed to write into %s, since table type incorrect, type %d", TD_VID(pVnode), ctbName, - mr.me.type); - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - continue; - } - - if (mr.me.ctbEntry.suid != suid) { - tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid: %" PRId64 - ", actual suid %" PRId64 "", - TD_VID(pVnode), ctbName, suid, mr.me.ctbEntry.suid); - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - continue; - } - - tbData.uid = mr.me.uid; - pTableSinkInfo->uid = mr.me.uid; - int32_t code = tqPutTableInfo(pTask->tbSink.pTblInfo, pDataBlock->info.id.groupId, pTableSinkInfo); - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pTableSinkInfo); - } - metaReaderClear(&mr); - } - } - - // rows - if (!pVals && !(pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)))) { - taosArrayDestroy(tbData.aRowP); - tdDestroySVCreateTbReq(tbData.pCreateTbReq); - goto _end; - } - - for (int32_t j = 0; j < rows; j++) { - taosArrayClear(pVals); - int32_t dataIndex = 0; - for (int32_t k = 0; k < pTSchema->numOfCols; k++) { - const STColumn* pCol = &pTSchema->columns[k]; - if (k == 0) { - SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); - void* colData = colDataGetData(pColData, j); - tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); - } - if (IS_SET_NULL(pCol)) { - SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); - } else { - SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); - if (colDataIsNull_s(pColData, j)) { - SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); - dataIndex++; - } else { - void* colData = colDataGetData(pColData, j); - if (IS_STR_DATA_TYPE(pCol->type)) { - // address copy, no value - SValue sv = (SValue){.nData = varDataLen(colData), .pData = varDataVal(colData)}; - SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); - taosArrayPush(pVals, &cv); - } else { - SValue sv; - memcpy(&sv.val, colData, tDataTypes[pCol->type].bytes); - SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); - taosArrayPush(pVals, &cv); - } - dataIndex++; - } - } - } - SRow* pRow = NULL; - if ((terrno = tRowBuild(pVals, (STSchema*)pTSchema, &pRow)) < 0) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - goto _end; - } - ASSERT(pRow); - taosArrayPush(tbData.aRowP, &pRow); - } - - SSubmitReq2 submitReq = {0}; - if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - goto _end; - } - - taosArrayPush(submitReq.aSubmitTbData, &tbData); - - // encode - int32_t len; - int32_t code; - tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); - SEncoder encoder; - len += sizeof(SSubmitReq2Msg); - pBuf = rpcMallocCont(len); - if (NULL == pBuf) { - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - goto _end; - } - ((SSubmitReq2Msg*)pBuf)->header.vgId = TD_VID(pVnode); - ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); - ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); - tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("failed to encode submit req since %s", terrstr()); - tEncoderClear(&encoder); - rpcFreeCont(pBuf); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - continue; - } - tEncoderClear(&encoder); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - - SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len }; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put into write-queue since %s", terrstr()); - } + code = doSinkResultBlock(pVnode, i, stbFullName, suid, pDataBlock, pTask); } } - tqDebug("vgId:%d, s-task:%s write results completed", TD_VID(pVnode), pTask->id.idStr); + tqDebug("vgId:%d, s-task:%s write results completed", vgId, pTask->id.idStr); _end: taosArrayDestroy(tagArray); @@ -498,3 +271,380 @@ _end: taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); // TODO: change } + +int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { + SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; + + int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { + taosArrayDestroy(deleteReq.deleteReqs); + return TSDB_CODE_SUCCESS; + } + + int32_t len; + tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); + if (code != TSDB_CODE_SUCCESS) { + qError("s-task:%s failed to encode delete request", pTask->id.idStr); + return code; + } + + SEncoder encoder; + void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); + void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, len); + tEncodeSBatchDeleteReq(&encoder, &deleteReq); + tEncoderClear(&encoder); + taosArrayDestroy(deleteReq.deleteReqs); + + ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); + + SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; + if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + tqDebug("failed to put delete req into write-queue since %s", terrstr()); + } + + return TSDB_CODE_SUCCESS; +} + +static bool isValidDestChildTable(SMetaReader* pReader, int32_t vgId, char* ctbName, int64_t suid) { + if (pReader->me.type != TSDB_CHILD_TABLE) { + tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type); + return false; + } + + if (pReader->me.ctbEntry.suid != suid) { + tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid:%" PRId64 ", actual:%" PRId64, + vgId, ctbName, suid, pReader->me.ctbEntry.suid); + return false; + } + + return true; +} + +static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock) { + char* ctbName = pDataBlock->info.parTbName; + + SVCreateTbReq* pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)); + if (pCreateTbReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + // set tag content + SArray* tagArray = taosArrayInit(1, sizeof(STagVal)); + if (tagArray == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tdDestroySVCreateTbReq(pCreateTbReq); + taosMemoryFreeClear(pCreateTbReq); + return NULL; + } + + // set const + pCreateTbReq->flags = 0; + pCreateTbReq->type = TSDB_CHILD_TABLE; + pCreateTbReq->ctb.suid = suid; + + // set super table name + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); + pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); + + STagVal tagVal = { .cid = numOfCols, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; + taosArrayPush(tagArray, &tagVal); + pCreateTbReq->ctb.tagNum = taosArrayGetSize(tagArray); + + STag* pTag = NULL; + tTagNew(tagArray, 1, false, &pTag); + taosArrayDestroy(tagArray); + + if (pTag == NULL) { + tdDestroySVCreateTbReq(pCreateTbReq); + taosMemoryFreeClear(pCreateTbReq); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pCreateTbReq->ctb.pTag = (uint8_t*)pTag; + + // set tag name + SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); + char k[TSDB_COL_NAME_LEN] = "group_id"; + taosArrayPush(tagName, k); + + pCreateTbReq->ctb.tagName = tagName; + + // set table name + pCreateTbReq->name = taosStrdup(ctbName); + return pCreateTbReq; +} + +static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, uint64_t uid, + const char* id) { + pTableSinkInfo->uid = uid; + + int32_t code = tqPutTableInfo(pSinkTableMap, groupId, pTableSinkInfo); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFreeClear(pTableSinkInfo); + tqError("s-task:%s failed to put tableSinkInfo in to cache, code:%s", id, tstrerror(code)); + } else { + tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, + pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); + } + + return code; +} + +int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, SSDataBlock* pDataBlock, + SStreamTask* pTask) { + int32_t numOfRows = pDataBlock->info.rows; + int32_t vgId = TD_VID(pVnode); + uint64_t groupId = pDataBlock->info.id.groupId; + STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t code = TSDB_CODE_SUCCESS; + void* pBuf = NULL; + SArray* pVals = NULL; + const char* id = pTask->id.idStr; + + SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; + tqDebug("s-task:%s sink data pipeline, build submit msg from %d-th resBlock, including %d rows, dst suid:%" PRId64, + id, blockIndex + 1, numOfRows, suid); + + tbData.aRowP = taosArrayInit(numOfRows, sizeof(SRow*)); + pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); + + if (tbData.aRowP == NULL || pVals == NULL) { + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + code = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:%s vgId:%d failed to prepare write stream res blocks, code:%s", id, vgId, tstrerror(code)); + return code; + } + + STableSinkInfo* pTableSinkInfo = NULL; + bool exist = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo); + + char* dstTableName = pDataBlock->info.parTbName; + if (exist) { + if (dstTableName[0] == 0) { + tstrncpy(dstTableName, pTableSinkInfo->name.data, pTableSinkInfo->name.len + 1); + tqDebug("s-task:%s vgId:%d, gropuId:%" PRIu64 " datablock table name is null, set name:%s", id, vgId, groupId, + dstTableName); + } else { + if (pTableSinkInfo->uid != 0) { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(uid:%" PRIu64 ")", id, numOfRows, groupId, + dstTableName, pTableSinkInfo->uid); + } else { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(not set uid yet for the secondary block)", + id, numOfRows, groupId, dstTableName); + } + } + } else { // not exist + if (dstTableName[0] == 0) { + memset(dstTableName, 0, TSDB_TABLE_NAME_LEN); + buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); + } + + int32_t nameLen = strlen(dstTableName); + pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo) + nameLen); + + pTableSinkInfo->name.len = nameLen; + memcpy(pTableSinkInfo->name.data, dstTableName, nameLen); + tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); + } + + if (exist) { + tbData.uid = pTableSinkInfo->uid; + + if (tbData.uid == 0) { + tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); + } + + while (pTableSinkInfo->uid == 0) { + // wait for the table to be created + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + code = metaGetTableEntryByName(&mr, dstTableName); + if (code == 0) { // table alreay exists, check its type and uid + bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); + if (!isValid) { // not valid table, ignore it + metaReaderClear(&mr); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + return TSDB_CODE_SUCCESS; + } else { + tqDebug("s-task:%s set uid:%"PRIu64" for dstTable:%s from meta", id, mr.me.uid, pTableSinkInfo->name.data); + + tbData.uid = mr.me.uid; + pTableSinkInfo->uid = mr.me.uid; + metaReaderClear(&mr); + } + } else { // not exist, wait and retry + metaReaderClear(&mr); + taosMsleep(100); + tqDebug("s-task:%s wait for the table:%s ready before insert data", id, dstTableName); + } + } + + } else { + // todo: this check is not safe, and results in losing of submit message from WAL. + // The auto-create option will always set to be open for those submit messages, which arrive during the period + // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning + // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for + // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have + // randomly generated false table uid in the WAL. + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + // table not in cache, let's try the extract it from tsdb meta + if (metaGetTableEntryByName(&mr, dstTableName) < 0) { + metaReaderClear(&mr); + + tqDebug("s-task:%s stream write into table:%s, table auto created", id, dstTableName); + + tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE; + tbData.pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock); + if (tbData.pCreateTbReq == NULL) { + tqError("s-task:%s failed to build auto create table req, code:%s", id, tstrerror(terrno)); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + + return terrno; + } + + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, 0, id); + } else { + bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); + if (!isValid) { + metaReaderClear(&mr); + taosMemoryFree(pTableSinkInfo); + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_SUCCESS; + } else { + tbData.uid = mr.me.uid; + metaReaderClear(&mr); + + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, tbData.uid, id); + } + } + } + + // rows + for (int32_t j = 0; j < numOfRows; j++) { + taosArrayClear(pVals); + + int32_t dataIndex = 0; + for (int32_t k = 0; k < pTSchema->numOfCols; k++) { + const STColumn* pCol = &pTSchema->columns[k]; + if (k == 0) { + SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); + void* colData = colDataGetData(pColData, j); + tqDebug("s-task:%s tq sink pipe2, row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); + } + + if (IS_SET_NULL(pCol)) { + SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); + taosArrayPush(pVals, &cv); + } else { + SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); + if (colDataIsNull_s(pColData, j)) { + SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); + taosArrayPush(pVals, &cv); + dataIndex++; + } else { + void* colData = colDataGetData(pColData, j); + if (IS_STR_DATA_TYPE(pCol->type)) { + // address copy, no value + SValue sv = (SValue){.nData = varDataLen(colData), .pData = varDataVal(colData)}; + SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); + taosArrayPush(pVals, &cv); + } else { + SValue sv; + memcpy(&sv.val, colData, tDataTypes[pCol->type].bytes); + SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); + taosArrayPush(pVals, &cv); + } + dataIndex++; + } + } + } + + SRow* pRow = NULL; + code = tRowBuild(pVals, (STSchema*)pTSchema, &pRow); + if (code != TSDB_CODE_SUCCESS) { + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return code; + } + + ASSERT(pRow); + taosArrayPush(tbData.aRowP, &pRow); + } + + SSubmitReq2 submitReq = {0}; + if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(submitReq.aSubmitTbData, &tbData); + + // encode + int32_t len = 0; + tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); + + SEncoder encoder; + len += sizeof(SSubmitReq2Msg); + + pBuf = rpcMallocCont(len); + if (NULL == pBuf) { + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + taosArrayDestroy(tbData.aRowP); + taosArrayDestroy(pVals); + } + + ((SSubmitReq2Msg*)pBuf)->header.vgId = vgId; + ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); + ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); + + tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); + if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("failed to encode submit req, code:%s, ignore and continue", terrstr()); + tEncoderClear(&encoder); + rpcFreeCont(pBuf); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + + return code; + } + + tEncoderClear(&encoder); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + + SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len }; + code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg); + + if(code == TSDB_CODE_SUCCESS) { + tqDebug("s-task:%s send submit msg to dstTable:%s, numOfRows:%d", id, pTableSinkInfo->name.data, numOfRows); + } else { + tqError("s-task:%s failed to put into write-queue since %s", id, terrstr()); + } + + taosArrayDestroy(pVals); + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index ab7093a701..61fc3c7ae9 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -14,128 +14,122 @@ */ #include "meta.h" +#include "streamSnapshot.h" #include "tdbInt.h" #include "tq.h" // STqSnapReader ======================================== -struct STqSnapReader { +struct SStreamStateReader { STQ* pTq; int64_t sver; int64_t ever; TBC* pCur; + + SStreamSnapReader* pReaderImpl; + int32_t complete; // open reader or not }; -int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) { - int32_t code = 0; - STqSnapReader* pReader = NULL; +int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateReader** ppReader) { + int32_t code = 0; + SStreamStateReader* pReader = NULL; + + char tdir[TSDB_FILENAME_LEN * 2] = {0}; // alloc - pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader)); + pReader = (SStreamStateReader*)taosMemoryCalloc(1, sizeof(SStreamStateReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + + SStreamMeta* meta = pTq->pStreamMeta; pReader->pTq = pTq; pReader->sver = sver; pReader->ever = ever; - // impl - code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL); - if (code) { + int64_t chkpId = meta ? meta->chkpId : 0; + + SStreamSnapReader* pSnapReader = NULL; + + if (streamSnapReaderOpen(pTq, sver, chkpId, pTq->path, &pSnapReader) == 0) { + pReader->complete = 1; + } else { + code = -1; taosMemoryFree(pReader); goto _err; } + pReader->pReaderImpl = pSnapReader; - code = tdbTbcMoveToFirst(pReader->pCur); - if (code) { - taosMemoryFree(pReader); - goto _err; - } - - tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode)); + tqDebug("vgId:%d, vnode %s snapshot reader opened", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER); *ppReader = pReader; return code; _err: - tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode %s snapshot reader failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + tstrerror(code)); *ppReader = NULL; return code; } -int32_t tqSnapReaderClose(STqSnapReader** ppReader) { +int32_t streamStateSnapReaderClose(SStreamStateReader* pReader) { int32_t code = 0; - - tdbTbcClose((*ppReader)->pCur); - taosMemoryFree(*ppReader); - *ppReader = NULL; - + tqDebug("vgId:%d, vnode %s snapshot reader closed", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); + streamSnapReaderClose(pReader->pReaderImpl); + taosMemoryFree(pReader); return code; } -int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - const void* pKey = NULL; - const void* pVal = NULL; - int32_t kLen = 0; - int32_t vLen = 0; - SDecoder decoder; - STqHandle handle; +int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { + tqDebug("vgId:%d, vnode %s snapshot read data", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); - *ppData = NULL; - for (;;) { - if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) { - goto _exit; - } - - tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSTqHandle(&decoder, &handle); - tDecoderClear(&decoder); - - if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) { - tdbTbcMoveToNext(pReader->pCur); - break; - } else { - tdbTbcMoveToNext(pReader->pCur); - } + int32_t code = 0; + if (pReader->complete == 0) { + return 0; } - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen); + uint8_t* rowData = NULL; + int64_t len; + code = streamSnapRead(pReader->pReaderImpl, &rowData, &len); + if (rowData == NULL || len == 0) { + return code; + } + *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + len); if (*ppData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - + // refactor later, avoid mem/free freq SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_TQ_HANDLE; - pHdr->size = vLen; - memcpy(pHdr->data, pVal, vLen); - - tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode), - handle.snapshotVer, handle.subKey, vLen); - -_exit: + pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; + pHdr->size = len; + memcpy(pHdr->data, rowData, len); + tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); return code; _err: - tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-state snapshot failed to read since %s", TD_VID(pReader->pTq->pVnode), + tstrerror(code)); return code; } // STqSnapWriter ======================================== -struct STqSnapWriter { +struct SStreamStateWriter { STQ* pTq; int64_t sver; int64_t ever; TXN* txn; + + SStreamSnapWriter* pWriterImpl; }; -int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { - int32_t code = 0; - STqSnapWriter* pWriter; +int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamStateWriter** ppWriter) { + int32_t code = 0; + SStreamStateWriter* pWriter; + char tdir[TSDB_FILENAME_LEN * 2] = {0}; // alloc - pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + pWriter = (SStreamStateWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -144,68 +138,48 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - code = -1; - taosMemoryFree(pWriter); + sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received"); + taosMkDir(tdir); + + SStreamSnapWriter* pSnapWriter = NULL; + if (streamSnapWriterOpen(pTq, sver, ever, tdir, &pSnapWriter) < 0) { goto _err; } + tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tdir); + pWriter->pWriterImpl = pSnapWriter; + *ppWriter = pWriter; return code; - _err: - tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); - *ppWriter = NULL; - return code; -} - -int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; - STqSnapWriter* pWriter = *ppWriter; - STQ* pTq = pWriter->pTq; - - if (rollback) { - tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); - } else { - code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); - if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); - if (code) goto _err; - } - + tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + tstrerror(code)); taosMemoryFree(pWriter); *ppWriter = NULL; + return -1; +} - // restore from metastore - if (tqMetaRestoreHandle(pTq) < 0) { - goto _err; +int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) { + int32_t code = 0; + tqDebug("vgId:%d, vnode %s snapshot writer closed", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + code = streamSnapWriterClose(pWriter->pWriterImpl, rollback); + + return code; +} +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { + tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta, chkpId); + if (code == 0) { + code = streamStateLoadTasks(pWriter); } - - return code; - -_err: - tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); + tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + taosMemoryFree(pWriter); return code; } -int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STQ* pTq = pWriter->pTq; - SDecoder decoder = {0}; - SDecoder* pDecoder = &decoder; - STqHandle handle; +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamMetaLoadAllTasks(pWriter->pTq->pStreamMeta); } - tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - code = tDecodeSTqHandle(pDecoder, &handle); - if (code) goto _err; - code = tqMetaSaveHandle(pTq, handle.subKey, &handle); - if (code < 0) goto _err; - tDecoderClear(pDecoder); - - return code; - -_err: - tDecoderClear(pDecoder); - tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); - return code; +int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { + tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqStreamTask.c similarity index 67% rename from source/dnode/vnode/src/tq/tqRestore.c rename to source/dnode/vnode/src/tq/tqStreamTask.c index ed612587f5..3c0321f300 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -14,13 +14,14 @@ */ #include "tq.h" +#include "vnd.h" -static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); -static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId); +static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle); +static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); +static void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); -// this function should be executed by stream threads. -// extract submit block from WAL, and add them into the input queue for the sources tasks. -int32_t tqStreamTasksScanWal(STQ* pTq) { +// extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. +int32_t tqScanWal(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; int64_t st = taosGetTimestampMs(); @@ -29,9 +30,9 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { int32_t scan = pMeta->walScanCounter; tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, scan); - // check all restore tasks + // check all tasks bool shouldIdle = true; - createStreamTaskRunReq(pTq->pStreamMeta, &shouldIdle); + doScanWalForAllTasks(pTq->pStreamMeta, &shouldIdle); int32_t times = 0; @@ -56,12 +57,12 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { return 0; } -int32_t tqStreamTasksStatusCheck(STQ* pTq) { +int32_t tqCheckAndRunStreamTask(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks); + tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } @@ -71,20 +72,31 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) { pTaskList = taosArrayDup(pMeta->pTaskList, NULL); taosWUnLockLatch(&pMeta->lock); + // broadcast the check downstream tasks msg for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamId* pTaskId = taosArrayGet(pTaskList, i); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { continue; } + // fill-history task can only be launched by related stream tasks. if (pTask->info.fillHistory == 1) { - tqDebug("s-task:%s fill-history task, wait for related stream task:0x%x to launch it", pTask->id.idStr, - pTask->streamTaskId.taskId); + streamMetaReleaseTask(pMeta, pTask); continue; } - streamTaskDoCheckDownstreamTasks(pTask); + if (pTask->status.downstreamReady == 1) { + tqDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", + pTask->id.idStr); + streamLaunchFillHistoryTask(pTask); + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + streamSetStatusNormal(pTask); + streamTaskCheckDownstream(pTask); + streamMetaReleaseTask(pMeta, pTask); } @@ -92,7 +104,7 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) { return 0; } -int32_t tqCheckStreamStatus(STQ* pTq) { +int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; @@ -113,10 +125,10 @@ int32_t tqCheckStreamStatus(STQ* pTq) { return -1; } - tqDebug("vgId:%d check for stream tasks status, numOfTasks:%d", vgId, numOfTasks); + tqDebug("vgId:%d check %d stream task(s) status async", vgId, numOfTasks); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = STREAM_TASK_STATUS_CHECK_ID; + pRunReq->taskId = STREAM_EXEC_TASK_STATUS_CHECK_ID; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); @@ -125,10 +137,15 @@ int32_t tqCheckStreamStatus(STQ* pTq) { return 0; } -int32_t tqStartStreamTasks(STQ* pTq) { +int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + // do not launch the stream tasks, if it is a follower or not restored vnode. + if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) { + return TSDB_CODE_SUCCESS; + } + taosWLockLatch(&pMeta->lock); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); @@ -146,6 +163,16 @@ int32_t tqStartStreamTasks(STQ* pTq) { return 0; } + int32_t numOfPauseTasks = pTq->pStreamMeta->pauseTaskNum; + if (ckPause && numOfTasks == numOfPauseTasks) { + tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); + + // reset the counter value, since we do not launch the scan wal operation. + pMeta->walScanCounter = 0; + taosWUnLockLatch(&pMeta->lock); + return 0; + } + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -157,7 +184,7 @@ int32_t tqStartStreamTasks(STQ* pTq) { tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = EXTRACT_DATA_FROM_WAL_ID; + pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); @@ -166,12 +193,43 @@ int32_t tqStartStreamTasks(STQ* pTq) { return 0; } -int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { +int32_t tqStopStreamTasks(STQ* pTq) { + SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t vgId = TD_VID(pTq->pVnode); + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + + tqDebug("vgId:%d start to stop all %d stream task(s)", vgId, numOfTasks); + + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + SArray* pTaskList = NULL; + taosWLockLatch(&pMeta->lock); + pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosWUnLockLatch(&pMeta->lock); + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + continue; + } + + streamTaskStop(pTask); + streamMetaReleaseTask(pMeta, pTask); + } + + taosArrayDestroy(pTaskList); + return 0; +} + +int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId) { // seek the stored version and extract data from WAL int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); if (pTask->chkInfo.currentVer < firstVer) { - tqWarn("vgId:%d s-task:%s ver:%"PRId64" earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, vgId, - pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer); + tqWarn("vgId:%d s-task:%s ver:%" PRId64 " earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, + vgId, pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer); pTask->chkInfo.currentVer = firstVer; @@ -192,7 +250,8 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { } // append the data for the stream - tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.currentVer); + tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, + pTask->chkInfo.currentVer); } } @@ -209,7 +268,8 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { return TSDB_CODE_SUCCESS; } -static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { +// todo handle memory error +void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { const char* id = pTask->id.idStr; int64_t maxVer = pTask->dataRange.range.maxVer; @@ -221,8 +281,8 @@ static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); - appendTranstateIntoInputQ(pTask); - /*int32_t code = */streamSchedExec(pTask); + /*int32_t code = */streamTaskPutTranstateIntoInputQ(pTask); + /*int32_t code = */ streamSchedExec(pTask); } else { qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 ", not scan wal", id, ver, maxVer); @@ -230,7 +290,7 @@ static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { } } -int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { +int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = true; bool noDataInWal = true; int32_t vgId = pStreamMeta->vgId; @@ -252,7 +312,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { numOfTasks = taosArrayGetSize(pTaskList); for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); SStreamTask* pTask = streamMetaAcquireTask(pStreamMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { continue; @@ -266,8 +326,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } + const char* pStatus = streamGetTaskStatusStr(status); if (status != TASK_STATUS__NORMAL) { - tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, pStatus); streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -276,19 +337,19 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { ASSERT(status == TASK_STATUS__NORMAL); // the maximum version of data in the WAL has reached already, the step2 is done tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, - pTask->dataRange.range.maxVer); + pTask->dataRange.range.maxVer); streamMetaReleaseTask(pStreamMeta, pTask); continue; } - if (tInputQueueIsFull(pTask)) { + if (streamQueueIsFull(pTask->inputInfo.queue->pQueue)) { tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); streamMetaReleaseTask(pStreamMeta, pTask); continue; } // downstream task has blocked the output, stopped for a while - if (pTask->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { tqDebug("s-task:%s inputQ is blocked, do nothing", pTask->id.idStr); streamMetaReleaseTask(pStreamMeta, pTask); continue; @@ -297,31 +358,41 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = false; // seek the stored version and extract data from WAL - int32_t code = doSetOffsetForWalReader(pTask, vgId); + int32_t code = setWalReaderStartOffset(pTask, vgId); if (code != TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask); continue; } - int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue); - int64_t maxVer = (pTask->info.fillHistory == 1)? pTask->dataRange.range.maxVer:INT64_MAX; + int32_t numOfItems = streamTaskGetInputQItems(pTask); + int64_t maxVer = (pTask->info.fillHistory == 1) ? pTask->dataRange.range.maxVer : INT64_MAX; SStreamQueueItem* pItem = NULL; - code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, maxVer, pTask->id.idStr); + code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, pTask->id.idStr); - if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue - checkForFillHistoryVerRange(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); + if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItems == 0)) { // failed, continue + handleFillhistoryScanComplete(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); + streamMetaReleaseTask(pStreamMeta, pTask); + continue; + } + + taosThreadMutexLock(&pTask->lock); + pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + + if (pTask->status.taskStatus != TASK_STATUS__NORMAL) { + tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus); + taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pStreamMeta, pTask); continue; } if (pItem != NULL) { noDataInWal = false; - code = tAppendDataToInputQueue(pTask, pItem); + code = streamTaskPutDataIntoInputQ(pTask, pItem); if (code == TSDB_CODE_SUCCESS) { int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); pTask->chkInfo.currentVer = ver; - checkForFillHistoryVerRange(pTask, ver); + handleFillhistoryScanComplete(pTask, ver); tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, ver); } else { tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr, @@ -329,7 +400,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { } } - if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) { + taosThreadMutexUnlock(&pTask->lock); + + if ((code == TSDB_CODE_SUCCESS) || (numOfItems > 0)) { code = streamSchedExec(pTask); if (code != TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask); @@ -348,4 +421,3 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { taosArrayDestroy(pTaskList); return 0; } - diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index ab7093a701..2d58a10e51 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -18,19 +18,26 @@ #include "tq.h" // STqSnapReader ======================================== -struct STqSnapReader { + +typedef struct { + int8_t type; + TTB* tbl; +} STablePair; +struct SStreamTaskReader { STQ* pTq; int64_t sver; int64_t ever; TBC* pCur; + SArray* tdbTbList; + int8_t pos; }; -int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader) { - int32_t code = 0; - STqSnapReader* pReader = NULL; +int32_t streamTaskSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskReader** ppReader) { + int32_t code = 0; + SStreamTaskReader* pReader = NULL; // alloc - pReader = (STqSnapReader*)taosMemoryCalloc(1, sizeof(STqSnapReader)); + pReader = (SStreamTaskReader*)taosMemoryCalloc(1, sizeof(SStreamTaskReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -38,68 +45,100 @@ int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** p pReader->pTq = pTq; pReader->sver = sver; pReader->ever = ever; + pReader->tdbTbList = taosArrayInit(4, sizeof(STablePair)); - // impl - code = tdbTbcOpen(pTq->pExecStore, &pReader->pCur, NULL); + STablePair pair1 = {.tbl = pTq->pStreamMeta->pTaskDb, .type = SNAP_DATA_STREAM_TASK}; + taosArrayPush(pReader->tdbTbList, &pair1); + + STablePair pair2 = {.tbl = pTq->pStreamMeta->pCheckpointDb, .type = SNAP_DATA_STREAM_TASK_CHECKPOINT}; + taosArrayPush(pReader->tdbTbList, &pair2); + + pReader->pos = 0; + + STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); + code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL); if (code) { + tqInfo("vgId:%d, vnode stream-task snapshot reader failed to open, reason: %s", TD_VID(pTq->pVnode), + tstrerror(code)); taosMemoryFree(pReader); goto _err; } code = tdbTbcMoveToFirst(pReader->pCur); if (code) { + tqInfo("vgId:%d, vnode stream-task snapshot reader failed to iterate, reason: %s", TD_VID(pTq->pVnode), + tstrerror(code)); taosMemoryFree(pReader); goto _err; } - tqInfo("vgId:%d, vnode snapshot tq reader opened", TD_VID(pTq->pVnode)); + tqDebug("vgId:%d, vnode stream-task snapshot reader opened", TD_VID(pTq->pVnode)); *ppReader = pReader; return code; _err: - tqError("vgId:%d, vnode snapshot tq reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot reader open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); *ppReader = NULL; return code; } -int32_t tqSnapReaderClose(STqSnapReader** ppReader) { +int32_t streamTaskSnapReaderClose(SStreamTaskReader* pReader) { int32_t code = 0; - - tdbTbcClose((*ppReader)->pCur); - taosMemoryFree(*ppReader); - *ppReader = NULL; - + tqInfo("vgId:%d, vnode stream-task snapshot reader closed", TD_VID(pReader->pTq->pVnode)); + taosArrayDestroy(pReader->tdbTbList); + tdbTbcClose(pReader->pCur); + taosMemoryFree(pReader); return code; } -int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { +int32_t streamTaskSnapRead(SStreamTaskReader* pReader, uint8_t** ppData) { int32_t code = 0; const void* pKey = NULL; - const void* pVal = NULL; + void* pVal = NULL; int32_t kLen = 0; int32_t vLen = 0; SDecoder decoder; STqHandle handle; *ppData = NULL; + int8_t except = 0; + tqDebug("vgId:%d, vnode stream-task snapshot start read data", TD_VID(pReader->pTq->pVnode)); + + STablePair* pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); +NextTbl: + except = 0; for (;;) { - if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &pVal, &vLen)) { - goto _exit; - } - - tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSTqHandle(&decoder, &handle); - tDecoderClear(&decoder); - - if (handle.snapshotVer <= pReader->sver && handle.snapshotVer >= pReader->ever) { - tdbTbcMoveToNext(pReader->pCur); + const void* tVal = NULL; + int32_t tLen = 0; + if (tdbTbcGet(pReader->pCur, &pKey, &kLen, &tVal, &tLen)) { + except = 1; break; } else { - tdbTbcMoveToNext(pReader->pCur); + pVal = taosMemoryCalloc(1, tLen); + memcpy(pVal, tVal, tLen); + vLen = tLen; + } + tdbTbcMoveToNext(pReader->pCur); + break; + } + if (except == 1) { + if (pReader->pos + 1 < taosArrayGetSize(pReader->tdbTbList)) { + tdbTbcClose(pReader->pCur); + + pReader->pos += 1; + pPair = taosArrayGet(pReader->tdbTbList, pReader->pos); + code = tdbTbcOpen(pPair->tbl, &pReader->pCur, NULL); + tdbTbcMoveToFirst(pReader->pCur); + + goto NextTbl; } } - + if (pVal == NULL || vLen == 0) { + *ppData = NULL; + tqDebug("vgId:%d, vnode stream-task snapshot finished read data", TD_VID(pReader->pTq->pVnode)); + return code; + } *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + vLen); if (*ppData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -107,35 +146,34 @@ int32_t tqSnapRead(STqSnapReader* pReader, uint8_t** ppData) { } SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_TQ_HANDLE; + pHdr->type = pPair->type; pHdr->size = vLen; memcpy(pHdr->data, pVal, vLen); + taosMemoryFree(pVal); - tqInfo("vgId:%d, vnode snapshot tq read data, version:%" PRId64 " subKey: %s vLen:%d", TD_VID(pReader->pTq->pVnode), - handle.snapshotVer, handle.subKey, vLen); + tqDebug("vgId:%d, vnode stream-task snapshot read data vLen:%d", TD_VID(pReader->pTq->pVnode), vLen); -_exit: return code; - _err: - tqError("vgId:%d, vnode snapshot tq read data failed since %s", TD_VID(pReader->pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot read data failed since %s", TD_VID(pReader->pTq->pVnode), + tstrerror(code)); return code; } // STqSnapWriter ======================================== -struct STqSnapWriter { +struct SStreamTaskWriter { STQ* pTq; int64_t sver; int64_t ever; TXN* txn; }; -int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { - int32_t code = 0; - STqSnapWriter* pWriter; +int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter) { + int32_t code = 0; + SStreamTaskWriter* pWriter; // alloc - pWriter = (STqSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + pWriter = (SStreamTaskWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -144,68 +182,84 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + if (tdbBegin(pTq->pStreamMeta->db, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { code = -1; taosMemoryFree(pWriter); goto _err; } *ppWriter = pWriter; + tqDebug("vgId:%d, vnode stream-task snapshot writer opened", TD_VID(pTq->pVnode)); return code; _err: - tqError("vgId:%d, tq snapshot writer open failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot writer failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code)); *ppWriter = NULL; return code; + return 0; } -int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; - STqSnapWriter* pWriter = *ppWriter; - STQ* pTq = pWriter->pTq; +int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { + int32_t code = 0; + STQ* pTq = pWriter->pTq; + tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode)); if (rollback) { - tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); + tdbAbort(pWriter->pTq->pStreamMeta->db, pWriter->txn); } else { - code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); + code = tdbCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); + code = tdbPostCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); if (code) goto _err; } taosMemoryFree(pWriter); - *ppWriter = NULL; // restore from metastore - if (tqMetaRestoreHandle(pTq) < 0) { - goto _err; + // if (tqMetaRestoreHandle(pTq) < 0) { + // goto _err; + // } + + return code; + +_err: + tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode), + tstrerror(code)); + return code; + return 0; +} + +int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData) { + int32_t code = 0; + STQ* pTq = pWriter->pTq; + STqHandle handle; + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + if (pHdr->type == SNAP_DATA_STREAM_TASK) { + SStreamTaskId task = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); + + code = tDecodeStreamTaskId(&decoder, &task); + if (code < 0) { + tDecoderClear(&decoder); + goto _err; + } + tDecoderClear(&decoder); + // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) + int64_t key[2] = {task.streamId, task.taskId}; + if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, key, sizeof(int64_t) << 1, (uint8_t*)pData + sizeof(SSnapDataHdr), + nData - sizeof(SSnapDataHdr), pWriter->txn) < 0) { + return -1; + } + } else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) { + // do nothing } + tqDebug("vgId:%d, vnode stream-task snapshot write", TD_VID(pTq->pVnode)); return code; _err: - tqError("vgId:%d, tq snapshot writer close failed since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); - return code; -} - -int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STQ* pTq = pWriter->pTq; - SDecoder decoder = {0}; - SDecoder* pDecoder = &decoder; - STqHandle handle; - - tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - code = tDecodeSTqHandle(pDecoder, &handle); - if (code) goto _err; - code = tqMetaSaveHandle(pTq, handle.subKey, &handle); - if (code < 0) goto _err; - tDecoderClear(pDecoder); - - return code; - -_err: - tDecoderClear(pDecoder); - tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqError("vgId:%d, vnode stream-task snapshot failed to write since %s", TD_VID(pTq->pVnode), tstrerror(code)); return code; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index b7fd505784..60d23663d0 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -36,6 +36,12 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { return 0; } +void tqUpdateNodeStage(STQ* pTq) { + SSyncState state = syncGetState(pTq->pVnode->sync); + pTq->pStreamMeta->stage = state.term; + tqDebug("vgId:%d update the meta stage to be:%"PRId64, pTq->pStreamMeta->vgId, pTq->pStreamMeta->stage); +} + static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) { pRsp->reqOffset = pOffset; pRsp->rspOffset = pOffset; @@ -400,3 +406,56 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* tmsgSendRsp(&rsp); return 0; } + +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { + SDecoder* pCoder = &(SDecoder){0}; + SDeleteRes* pRes = &(SDeleteRes){0}; + + *pRefBlock = NULL; + + pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t)); + if (pRes->uidList == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + tDecoderInit(pCoder, (uint8_t*)pData, len); + tDecodeDeleteRes(pCoder, pRes); + tDecoderClear(pCoder); + + int32_t numOfTables = taosArrayGetSize(pRes->uidList); + if (numOfTables == 0 || pRes->affectedRows == 0) { + taosArrayDestroy(pRes->uidList); + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); + blockDataEnsureCapacity(pDelBlock, numOfTables); + pDelBlock->info.rows = numOfTables; + pDelBlock->info.version = ver; + + for (int32_t i = 0; i < numOfTables; i++) { + // start key column + SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX); + colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false); // end key column + SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX); + colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false); + // uid column + SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX); + int64_t* pUid = taosArrayGet(pRes->uidList, i); + colDataSetVal(pUidCol, i, (const char*)pUid, false); + + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i); + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i); + colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i); + } + + taosArrayDestroy(pRes->uidList); + *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + if (*pRefBlock == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; + (*pRefBlock)->pBlock = pDelBlock; + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index c684ad5184..14aa2a84a9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -88,6 +88,9 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { + STsdb *pdb = *pTsdb; + tsdbDebug("vgId:%d, tsdb is close at %s, days:%d, keep:%d,%d,%d", TD_VID(pdb->pVnode), pdb->path, pdb->keepCfg.days, + pdb->keepCfg.keep0, pdb->keepCfg.keep1, pdb->keepCfg.keep2); taosThreadRwlockWrlock(&(*pTsdb)->rwLock); tsdbMemTableDestroy((*pTsdb)->mem, true); (*pTsdb)->mem = NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index c10d8c628d..fa3e00ce0f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -1102,10 +1102,10 @@ static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* p (pVerRange->maxVer < pBlock->record.maxVer && pVerRange->maxVer >= pBlock->record.minVer); } -static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, +static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, int32_t* nextIndex, int32_t order, SBrinRecord* pRecord) { bool asc = ASCENDING_TRAVERSE(order); - if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { + if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockIdxList) - 1) { return false; } @@ -1116,7 +1116,8 @@ static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBl int32_t step = asc ? 1 : -1; // *nextIndex = pBlockInfo->tbBlockIdx + step; // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); - SBrinRecord* p = taosArrayGet(pTableBlockScanInfo->pBlockList, pBlockInfo->tbBlockIdx + step); + STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step); + SBrinRecord* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); memcpy(pRecord, p, sizeof(SBrinRecord)); *nextIndex = pBlockInfo->tbBlockIdx + step; @@ -1141,7 +1142,7 @@ static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlock return -1; } -static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) { +static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t index, int32_t step) { if (index < 0 || index >= pBlockIter->numOfBlocks) { return -1; } @@ -1149,12 +1150,34 @@ static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index); pBlockIter->index += step; - if (index != pBlockIter->index) { - taosArrayRemove(pBlockIter->blockList, index); - taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock); + if (index != pBlockIter->index) { + if (index > pBlockIter->index) { + for (int32_t i = index - 1; i >= pBlockIter->index; --i) { + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, i); - SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx); + STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx); + pTableDataBlockIdx->globalIndex = i + 1; + + taosArraySet(pBlockIter->blockList, i + 1, pBlockInfo); + } + } else if (index < pBlockIter->index) { + for (int32_t i = index + 1; i <= pBlockIter->index; ++i) { + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, i); + + STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx); + pTableDataBlockIdx->globalIndex = i - 1; + + taosArraySet(pBlockIter->blockList, i - 1, pBlockInfo); + } + + } + + taosArraySet(pBlockIter->blockList, pBlockIter->index, &fblock); + STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, fblock.uid, pReader->idStr); + STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, fblock.tbBlockIdx); + pTableDataBlockIdx->globalIndex = pBlockIter->index; } return TSDB_CODE_SUCCESS; @@ -1260,7 +1283,7 @@ static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* int32_t neighborIndex = 0; SBrinRecord rec = {0}; - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pScanInfo, &neighborIndex, pReader->info.order, &rec); + bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pScanInfo, &neighborIndex, pReader->info.order, &rec); // overlap with neighbor if (hasNeighbor) { @@ -2232,7 +2255,7 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock *loadNeighbor = false; SBrinRecord rec = {0}; - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pBlockScanInfo, &nextIndex, pReader->info.order, &rec); + bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pBlockScanInfo, &nextIndex, pReader->info.order, &rec); if (!hasNeighbor) { // do nothing return code; } @@ -2242,11 +2265,11 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock SDataBlockIter* pBlockIter = &pStatus->blockIter; // 1. find the next neighbor block in the scan block list - SFileDataBlockInfo fb = {.uid = pBlockInfo->uid, .tbBlockIdx = nextIndex}; - int32_t neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb); + STableDataBlockIdx* tableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, nextIndex); + int32_t neighborIndex = tableDataBlockIdx->globalIndex; // 2. remove it from the scan block list - setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step); + setFileBlockActiveInBlockIter(pReader, pBlockIter, neighborIndex, step); // 3. load the neighbor block, and set it to be the currently accessed file data block code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pBlockInfo->uid); @@ -4178,6 +4201,7 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { } pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); + pBlockScanInfo->pBlockIdxList = taosArrayDestroy(pBlockScanInfo->pBlockIdxList); // TODO: keep skyline for reuse pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 809e00cc79..1f3c8b54ec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -220,6 +220,7 @@ void clearBlockScanInfo(STableBlockScanInfo* p) { p->delSkyline = taosArrayDestroy(p->delSkyline); p->pBlockList = taosArrayDestroy(p->pBlockList); + p->pBlockIdxList = taosArrayDestroy(p->pBlockIdxList); p->pMemDelData = taosArrayDestroy(p->pMemDelData); p->pFileDelData = taosArrayDestroy(p->pFileDelData); } @@ -238,6 +239,7 @@ void destroyAllBlockScanInfo(SSHashObj* pTableMap) { static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) { // reset the index in last block when handing a new file taosArrayClear(pScanInfo->pBlockList); + taosArrayClear(pScanInfo->pBlockIdxList); taosArrayClear(pScanInfo->pFileDelData); // del data from each file set } @@ -384,12 +386,21 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3 // since there is only one table qualified, blocks are not sorted if (sup.numOfTables == 1) { + STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, 0); + if (pTableScanInfo->pBlockIdxList == NULL) { + pTableScanInfo->pBlockIdxList = taosArrayInit(numOfBlocks, sizeof(STableDataBlockIdx)); + } for (int32_t i = 0; i < numOfBlocks; ++i) { SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i); taosArrayPush(pBlockIter->blockList, &blockInfo); + + STableDataBlockIdx tableDataBlockIdx = {.globalIndex = i}; + taosArrayPush(pTableScanInfo->pBlockIdxList, &tableDataBlockIdx); } + taosArrayDestroy(pTableScanInfo->pBlockList); + pTableScanInfo->pBlockList = NULL; int64_t et = taosGetTimestampUs(); tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", @@ -420,7 +431,13 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3 blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index); taosArrayPush(pBlockIter->blockList, &blockInfo); - + STableBlockScanInfo *pTableScanInfo = sup.pDataBlockInfo[pos][index].pInfo; + if (pTableScanInfo->pBlockIdxList == NULL) { + size_t szTableDataBlocks = taosArrayGetSize(pTableScanInfo->pBlockList); + pTableScanInfo->pBlockIdxList = taosArrayInit(szTableDataBlocks, sizeof(STableDataBlockIdx)); + } + STableDataBlockIdx tableDataBlockIdx = {.globalIndex = numOfTotal}; + taosArrayPush(pTableScanInfo->pBlockIdxList, &tableDataBlockIdx); // set data block index overflow, in order to disable the offset comparator if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) { sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1; @@ -430,6 +447,12 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3 tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); } + for (int32_t i = 0; i < numOfTables; ++i) { + STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); + taosArrayDestroy(pTableScanInfo->pBlockList); + pTableScanInfo->pBlockList = NULL; + } + int64_t et = taosGetTimestampUs(); tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index 5fe7d2f679..7cda8c71e2 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -59,11 +59,16 @@ typedef struct { bool hasVal; } SIterInfo; +typedef struct STableDataBlockIdx { + int32_t globalIndex; +} STableDataBlockIdx; + typedef struct STableBlockScanInfo { uint64_t uid; TSKEY lastKey; TSKEY lastKeyInStt; // last accessed key in stt SArray* pBlockList; // block data index list, SArray + SArray* pBlockIdxList; // SArray SArray* pMemDelData; // SArray SArray* pFileDelData; // SArray from each file set SIterInfo iter; // mem buffer skip list iterator diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 0b45ff5c4d..db94f32459 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -422,6 +422,15 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC // open tq sprintf(tdir, "%s%s%s", dir, TD_DIRSEP, VNODE_TQ_DIR); taosRealPath(tdir, NULL, sizeof(tdir)); + + // open query + if (vnodeQueryOpen(pVnode)) { + vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + // sma required the tq is initialized before the vnode open pVnode->pTq = tqOpen(tdir, pVnode); if (pVnode->pTq == NULL) { vError("vgId:%d, failed to open vnode tq since %s", TD_VID(pVnode), tstrerror(terrno)); @@ -434,13 +443,6 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC goto _err; } - // open query - if (vnodeQueryOpen(pVnode)) { - vError("vgId:%d, failed to open vnode query since %s", TD_VID(pVnode), tstrerror(terrno)); - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - // vnode begin if (vnodeBegin(pVnode) < 0) { vError("vgId:%d, failed to begin since %s", TD_VID(pVnode), tstrerror(terrno)); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index d559783c2f..f19068ea88 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -87,11 +87,12 @@ void vnodeSnapReaderClose(SVSnapReader *pReader) { int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) { int32_t code = 0; SVnode *pVnode = pReader->pVnode; + int32_t vgId = TD_VID(pReader->pVnode); // CONFIG ============== // FIXME: if commit multiple times and the config changed? if (!pReader->cfgDone) { - char fName[TSDB_FILENAME_LEN]; + char fName[TSDB_FILENAME_LEN]; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, fName, TSDB_FILENAME_LEN); @@ -220,9 +221,57 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // STREAM ============ + vInfo("vgId:%d stream task start", vgId); if (!pReader->streamTaskDone) { + if (pReader->pStreamTaskReader == NULL) { + code = streamTaskSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamTaskReader); + if (code) { + vError("vgId:%d open streamtask snapshot reader failed, code:%s", vgId, tstrerror(code)); + goto _err; + } + } + + code = streamTaskSnapRead(pReader->pStreamTaskReader, ppData); + if (code) { + vError("vgId:%d error happens during read data from streatask snapshot, code:%s", vgId, tstrerror(code)); + goto _err; + } else { + if (*ppData) { + vInfo("vgId:%d no streamTask snapshot", vgId); + goto _exit; + } else { + pReader->streamTaskDone = 1; + code = streamTaskSnapReaderClose(pReader->pStreamTaskReader); + if (code) { + goto _err; + } + pReader->pStreamTaskReader = NULL; + } + } } if (!pReader->streamStateDone) { + if (pReader->pStreamStateReader == NULL) { + code = + streamStateSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamStateReader); + if (code) { + pReader->streamStateDone = 1; + pReader->pStreamStateReader = NULL; + goto _err; + } + } + code = streamStateSnapRead(pReader->pStreamStateReader, ppData); + if (code) { + goto _err; + } else { + if (*ppData) { + goto _exit; + } else { + pReader->streamStateDone = 1; + code = streamStateSnapReaderClose(pReader->pStreamStateReader); + if (code) goto _err; + pReader->pStreamStateReader = NULL; + } + } } // RSMA ============== @@ -257,15 +306,15 @@ _exit: pReader->index++; *nData = sizeof(SSnapDataHdr) + pHdr->size; pHdr->index = pReader->index; - vDebug("vgId:%d, vnode snapshot read data, index:%" PRId64 " type:%d blockLen:%d ", TD_VID(pReader->pVnode), - pReader->index, pHdr->type, *nData); + vDebug("vgId:%d, vnode snapshot read data, index:%" PRId64 " type:%d blockLen:%d ", vgId, pReader->index, + pHdr->type, *nData); } else { - vInfo("vgId:%d, vnode snapshot read data end, index:%" PRId64, TD_VID(pReader->pVnode), pReader->index); + vInfo("vgId:%d, vnode snapshot read data end, index:%" PRId64, vgId, pReader->index); } return code; _err: - vError("vgId:%d, vnode snapshot read failed since %s", TD_VID(pReader->pVnode), tstrerror(code)); + vError("vgId:%d, vnode snapshot read failed since %s", vgId, tstrerror(code)); return code; } @@ -362,6 +411,20 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } + if (pWriter->pStreamTaskWriter) { + code = streamTaskSnapWriterClose(pWriter->pStreamTaskWriter, rollback); + if (code) goto _exit; + } + + if (pWriter->pStreamStateWriter) { + code = streamStateSnapWriterClose(pWriter->pStreamStateWriter, rollback); + if (code) goto _exit; + + code = streamStateRebuildFromSnap(pWriter->pStreamStateWriter, 0); + pWriter->pStreamStateWriter = NULL; + if (code) goto _exit; + } + if (pWriter->pRsmaSnapWriter) { code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback); if (code) goto _exit; @@ -381,7 +444,7 @@ _exit: } static int32_t vnodeSnapWriteInfo(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { - int32_t code = 0; + int32_t code = 0; SVnode *pVnode = pWriter->pVnode; SSnapDataHdr *pHdr = (SSnapDataHdr *)pData; @@ -459,9 +522,23 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { } break; case SNAP_DATA_TQ_OFFSET: { } break; - case SNAP_DATA_STREAM_TASK: { + case SNAP_DATA_STREAM_TASK: + case SNAP_DATA_STREAM_TASK_CHECKPOINT: { + if (pWriter->pStreamTaskWriter == NULL) { + code = streamTaskSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamTaskWriter); + if (code) goto _err; + } + code = streamTaskSnapWrite(pWriter->pStreamTaskWriter, pData, nData); + if (code) goto _err; } break; - case SNAP_DATA_STREAM_STATE: { + case SNAP_DATA_STREAM_STATE_BACKEND: { + if (pWriter->pStreamStateWriter == NULL) { + code = streamStateSnapWriterOpen(pVnode->pTq, pWriter->sver, pWriter->ever, &pWriter->pStreamStateWriter); + if (code) goto _err; + } + code = streamStateSnapWrite(pWriter->pStreamStateWriter, pData, nData); + if (code) goto _err; + } break; case SNAP_DATA_RSMA1: case SNAP_DATA_RSMA2: diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index ccdde8ade4..0b7f969ed7 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -377,7 +377,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { SEncoder *pCoder = &(SEncoder){0}; SDeleteRes res = {0}; - SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; initStorageAPI(&handle.api); code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res); @@ -561,7 +561,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } break; case TDMT_STREAM_TASK_DEPLOY: { - if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) { + if (tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) { goto _err; } } break; @@ -571,12 +571,14 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } } break; case TDMT_STREAM_TASK_PAUSE: { - if (pVnode->restored && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { + if (pVnode->restored && vnodeIsLeader(pVnode) && + tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { goto _err; } } break; case TDMT_STREAM_TASK_RESUME: { - if (pVnode->restored && tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { + if (pVnode->restored && vnodeIsLeader(pVnode) && + tqProcessTaskResumeReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { goto _err; } } break; @@ -586,6 +588,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } break; + case TDMT_VND_ALTER_CONFIG: vnodeProcessAlterConfigReq(pVnode, ver, pReq, len, pRsp); break; @@ -598,6 +601,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg case TDMT_VND_DROP_INDEX: vnodeProcessDropIndexReq(pVnode, ver, pReq, len, pRsp); break; + case TDMT_VND_STREAM_CHECK_POINT_SOURCE: + tqProcessStreamCheckPointSourceReq(pVnode->pTq, pMsg); + break; + case TDMT_VND_STREAM_TASK_UPDATE: + tqProcessTaskUpdateReq(pVnode->pTq, pMsg); + break; case TDMT_VND_COMPACT: vnodeProcessCompactVnodeReq(pVnode, ver, pReq, len, pRsp); goto _exit; @@ -614,7 +623,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg walApplyVer(pVnode->pWal, ver); - if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, ver) < 0) { + if (tqPushMsg(pVnode->pTq, pMsg->msgType) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } @@ -665,7 +674,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { return 0; } - SReadHandle handle = {.config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; initStorageAPI(&handle.api); switch (pMsg->msgType) { @@ -744,9 +753,9 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); case TDMT_STREAM_TASK_DISPATCH_RSP: return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECK: + case TDMT_VND_STREAM_TASK_CHECK: return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECK_RSP: + case TDMT_VND_STREAM_TASK_CHECK_RSP: return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg); case TDMT_STREAM_RETRIEVE: return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); @@ -754,10 +763,12 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); case TDMT_VND_STREAM_SCAN_HISTORY: return tqProcessTaskScanHistory(pVnode->pTq, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: return tqProcessTaskScanHistoryFinishReq(pVnode->pTq, pMsg); - case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: return tqProcessTaskScanHistoryFinishRsp(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECKPOINT_READY: + return tqProcessStreamTaskCheckpointReadyMsg(pVnode->pTq, pMsg); default: vError("unknown msg type:%d in stream queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -765,7 +776,6 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) } void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { - // blockDebugShowDataBlocks(data, __func__); tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); } @@ -938,7 +948,10 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, sprintf(detail, "btime:%" PRId64 ", flags:%d, ttl:%d, type:%d", pCreateReq->btime, pCreateReq->flags, pCreateReq->ttl, pCreateReq->type); - auditRecord(pReq, clusterId, "createTable", pVnode->config.dbname, pCreateReq->name, detail); + SName name = {0}; + tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); + + auditRecord(pReq, clusterId, "createTable", name.dbname, pCreateReq->name, detail); } vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); @@ -1668,7 +1681,7 @@ static int32_t vnodeConsolidateAlterHashRange(SVnode *pVnode, int64_t ver) { } static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { - vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confim msg is processed", TD_VID(pVnode)); + vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confirm msg is processed", TD_VID(pVnode)); int32_t code = TSDB_CODE_SUCCESS; if (!pVnode->config.hashChange) { goto _exit; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index d140c4a122..d580b41093 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -14,6 +14,7 @@ */ #define _DEFAULT_SOURCE +#include "tq.h" #include "vnd.h" #define BATCH_ENABLE 0 @@ -216,7 +217,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType)); + vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); terrno = TSDB_CODE_SYN_RESTORING; vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); @@ -279,7 +281,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, TMSG_INFO(pMsg->msgType)); + vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); @@ -526,7 +529,8 @@ static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *p } static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) { - SVnode *pVnode = pFsm->data; + SVnode *pVnode = pFsm->data; + int32_t vgId = pVnode->config.vgId; SyncIndex appliedIdx = -1; do { @@ -538,7 +542,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) } else { vInfo("vgId:%d, restore not finish since %" PRId64 " items to be applied. commit-index:%" PRId64 ", applied-index:%" PRId64, - pVnode->config.vgId, commitIdx - appliedIdx, commitIdx, appliedIdx); + vgId, commitIdx - appliedIdx, commitIdx, appliedIdx); taosMsleep(10); } } while (true); @@ -547,14 +551,19 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) walApplyVer(pVnode->pWal, commitIdx); pVnode->restored = true; - vInfo("vgId:%d, sync restore finished, start to restore stream tasks by replay wal", pVnode->config.vgId); - // start to restore all stream tasks - if (tsDisableStream) { - vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId); + if (vnodeIsRoleLeader(pVnode)) { + vInfo("vgId:%d, sync restore finished, start to launch stream tasks", vgId); + + // start to restore all stream tasks + if (tsDisableStream) { + vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", vgId); + } else { + vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); + tqCheckAndRunStreamTaskAsync(pVnode->pTq); + } } else { - vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); - tqCheckStreamStatus(pVnode->pTq); + vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); } } @@ -569,6 +578,8 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { tsem_post(&pVnode->syncSem); } taosThreadMutexUnlock(&pVnode->lock); + + tqStopStreamTasks(pVnode->pTq); } static void vnodeBecomeLearner(const SSyncFSM *pFsm) { @@ -586,6 +597,9 @@ static void vnodeBecomeLearner(const SSyncFSM *pFsm) { static void vnodeBecomeLeader(const SSyncFSM *pFsm) { SVnode *pVnode = pFsm->data; + if (pVnode->pTq) { + tqUpdateNodeStage(pVnode->pTq); + } vDebug("vgId:%d, become leader", pVnode->config.vgId); } @@ -660,8 +674,8 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion) { vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) { SNodeInfo *pNode = &pCfg->nodeInfo[i]; - vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort, - pNode->nodeId, pNode->clusterId); + vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, + pNode->nodePort, pNode->nodeId, pNode->clusterId); } pVnode->sync = syncOpen(&syncInfo, vnodeVersion); diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 0bd35353e0..8726f57977 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -21,11 +21,11 @@ extern "C" { #include "os.h" #include "tcommon.h" +#include "theap.h" #include "tlosertree.h" #include "tsort.h" #include "ttszip.h" #include "tvariant.h" -#include "theap.h" #include "dataSinkMgt.h" #include "executil.h" @@ -39,22 +39,16 @@ extern "C" { #include "tlockfree.h" #include "tmsg.h" #include "tpagedbuf.h" -//#include "tstream.h" -//#include "tstreamUpdate.h" +// #include "tstream.h" +// #include "tstreamUpdate.h" #include "tlrucache.h" typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); typedef struct STsdbReader STsdbReader; -typedef struct STqReader STqReader; - - -typedef enum SOperatorParamType{ - OP_GET_PARAM = 1, - OP_NOTIFY_PARAM -} SOperatorParamType; - +typedef struct STqReader STqReader; +typedef enum SOperatorParamType { OP_GET_PARAM = 1, OP_NOTIFY_PARAM } SOperatorParamType; #define IS_VALID_SESSION_WIN(winInfo) ((winInfo).sessionWin.win.skey > 0) #define SET_SESSION_WIN_INVALID(winInfo) ((winInfo).sessionWin.win.skey = INT64_MIN) @@ -114,17 +108,17 @@ typedef struct SExchangeOpStopInfo { } SExchangeOpStopInfo; typedef struct SGcOperatorParam { - int64_t sessionId; - int32_t downstreamIdx; - int32_t vgId; - int64_t tbUid; - bool needCache; + int64_t sessionId; + int32_t downstreamIdx; + int32_t vgId; + int64_t tbUid; + bool needCache; } SGcOperatorParam; typedef struct SGcNotifyOperatorParam { - int32_t downstreamIdx; - int32_t vgId; - int64_t tbUid; + int32_t downstreamIdx; + int32_t vgId; + int64_t tbUid; } SGcNotifyOperatorParam; typedef struct SExprSupp { @@ -166,15 +160,15 @@ typedef struct SSortMergeJoinOperatorParam { } SSortMergeJoinOperatorParam; typedef struct SExchangeOperatorBasicParam { - int32_t vgId; - int32_t srcOpType; - bool tableSeq; - SArray* uidList; + int32_t vgId; + int32_t srcOpType; + bool tableSeq; + SArray* uidList; } SExchangeOperatorBasicParam; typedef struct SExchangeOperatorBatchParam { - bool multiParams; - SSHashObj* pBatchs; // SExchangeOperatorBasicParam + bool multiParams; + SSHashObj* pBatchs; // SExchangeOperatorBasicParam } SExchangeOperatorBatchParam; typedef struct SExchangeOperatorParam { @@ -259,7 +253,7 @@ typedef struct STableScanBase { SLimitInfo limitInfo; // there are more than one table list exists in one task, if only one vnode exists. STableListInfo* pTableListInfo; - TsdReader readerAPI; + TsdReader readerAPI; } STableScanBase; typedef struct STableScanInfo { @@ -275,7 +269,7 @@ typedef struct STableScanInfo { int8_t assignBlockUid; bool hasGroupByTag; bool countOnly; -// TsdReader readerAPI; + // TsdReader readerAPI; } STableScanInfo; typedef struct STableMergeScanInfo { @@ -309,21 +303,21 @@ typedef struct STagScanFilterContext { } STagScanFilterContext; typedef struct STagScanInfo { - SColumnInfo* pCols; - SSDataBlock* pRes; - SColMatchInfo matchInfo; - int32_t curPos; - SLimitNode* pSlimit; - SReadHandle readHandle; - STableListInfo* pTableListInfo; - uint64_t suid; - void* pCtbCursor; - SNode* pTagCond; - SNode* pTagIndexCond; + SColumnInfo* pCols; + SSDataBlock* pRes; + SColMatchInfo matchInfo; + int32_t curPos; + SLimitNode* pSlimit; + SReadHandle readHandle; + STableListInfo* pTableListInfo; + uint64_t suid; + void* pCtbCursor; + SNode* pTagCond; + SNode* pTagIndexCond; STagScanFilterContext filterCtx; - SArray* aUidTags; // SArray - SArray* aFilterIdxs; // SArray - SStorageAPI* pStorageAPI; + SArray* aUidTags; // SArray + SArray* aFilterIdxs; // SArray + SStorageAPI* pStorageAPI; } STagScanInfo; typedef enum EStreamScanMode { @@ -383,8 +377,6 @@ typedef struct STimeWindowAggSupp { int64_t waterMark; TSKEY maxTs; TSKEY minTs; - TSKEY checkPointTs; - TSKEY checkPointInterval; SColumnInfoData timeWindowData; // query time window info for scalar function execution. } STimeWindowAggSupp; @@ -407,20 +399,18 @@ typedef struct SStreamScanInfo { uint64_t numOfExec; // execution times STqReader* tqReader; - uint64_t groupId; + uint64_t groupId; struct SUpdateInfo* pUpdateInfo; EStreamScanMode scanMode; - struct SOperatorInfo* pStreamScanOp; - struct SOperatorInfo* pTableScanOp; + struct SOperatorInfo* pStreamScanOp; + struct SOperatorInfo* pTableScanOp; SArray* childIds; SWindowSupporter windowSup; SPartitionBySupporter partitionSup; SExprSupp* pPartScalarSup; bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA. int32_t scanWinIndex; // for state operator - int32_t pullDataResIndex; - SSDataBlock* pPullDataRes; // pull data SSDataBlock SSDataBlock* pDeleteDataRes; // delete data SSDataBlock int32_t deleteDataIndex; STimeWindow updateWin; @@ -435,12 +425,13 @@ typedef struct SStreamScanInfo { int32_t blockRecoverTotCnt; SSDataBlock* pRecoverRes; - SSDataBlock* pCreateTbRes; - int8_t igCheckUpdate; - int8_t igExpired; - void* pState; //void + SSDataBlock* pCreateTbRes; + int8_t igCheckUpdate; + int8_t igExpired; + void* pState; // void SStoreTqReader readerFn; - SStateStore stateStore; + SStateStore stateStore; + SSDataBlock* pCheckpointRes; } SStreamScanInfo; typedef struct { @@ -488,7 +479,7 @@ typedef struct SIntervalAggOperatorInfo { int64_t limit; bool slimited; int64_t slimit; - uint64_t curGroupId; // initialize to UINT64_MAX + uint64_t curGroupId; // initialize to UINT64_MAX uint64_t handledGroupNum; BoundedQueue* pBQ; } SIntervalAggOperatorInfo; @@ -502,6 +493,11 @@ typedef struct SMergeAlignedIntervalAggOperatorInfo { SResultRow* pResultRow; } SMergeAlignedIntervalAggOperatorInfo; +typedef struct SOpCheckPointInfo { + uint16_t checkPointId; + SHashObj* children; // key:child id +} SOpCheckPointInfo; + typedef struct SStreamIntervalOperatorInfo { SOptrBasicInfo binfo; // basic info SAggSupporter aggSup; // aggregate supporter @@ -523,15 +519,18 @@ typedef struct SStreamIntervalOperatorInfo { SSDataBlock* pPullDataRes; SArray* pChildren; int32_t numOfChild; - SStreamState* pState; // void + SStreamState* pState; // void SWinKey delKey; uint64_t numOfDatapack; SArray* pUpdated; SSHashObj* pUpdatedMap; int64_t dataVersion; - SStateStore statestore; + SStateStore stateStore; bool recvGetAll; SHashObj* pFinalPullDataMap; + SOpCheckPointInfo checkPointInfo; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamIntervalOperatorInfo; typedef struct SDataGroupInfo { @@ -578,6 +577,8 @@ typedef struct SStreamSessionAggOperatorInfo { int64_t dataVersion; SArray* historyWins; bool isHistoryOp; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -599,6 +600,8 @@ typedef struct SStreamStateAggOperatorInfo { int64_t dataVersion; bool isHistoryOp; SArray* historyWins; + bool reCkBlock; + SSDataBlock* pCheckpointRes; } SStreamStateAggOperatorInfo; typedef struct SStreamPartitionOperatorInfo { @@ -652,7 +655,9 @@ typedef struct SStreamFillOperatorInfo { #define OPTR_SET_OPENED(_optr) ((_optr)->status |= OP_OPENED) SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode); -int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo); + +int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, + SExecTaskInfo* pTaskInfo); void cleanupQueriedTableScanInfo(void* p); void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); @@ -724,7 +729,8 @@ bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap); bool functionNeedToExecute(SqlFunctionCtx* pCtx); bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup); bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup); -bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, SStateStore* pStore); +bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, + SStateStore* pStore); void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, uint64_t* pGp, void* pTbName); uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); @@ -736,8 +742,8 @@ bool groupbyTbname(SNodeList* pGroupList); int32_t buildDataBlockFromGroupRes(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, SGroupResInfo* pGroupResInfo); int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t size, SStateStore* pAPI); -int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, - SExprSupp* pSup, SGroupResInfo* pGroupResInfo); +int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo); int32_t releaseOutputBuf(void* pState, SWinKey* pKey, SResultRow* pResult, SStateStore* pAPI); void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order); int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order, @@ -755,15 +761,17 @@ void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExpr void doClearBufferedBlocks(SStreamScanInfo* pInfo); uint64_t calcGroupId(char* pData, int32_t len); -void streamOpReleaseState(struct SOperatorInfo* pOperator); -void streamOpReloadState(struct SOperatorInfo* pOperator); +void streamOpReleaseState(struct SOperatorInfo* pOperator); +void streamOpReloadState(struct SOperatorInfo* pOperator); -void destroyOperatorParamValue(void* pValues); -int32_t mergeOperatorParams(SOperatorParam* pDst, SOperatorParam* pSrc); -int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, int32_t srcOpType, bool tableSeq); -void freeExchangeGetBasicOperatorParam(void* pParam); -void freeOperatorParam(SOperatorParam* pParam, SOperatorParamType type); -void freeResetOperatorParams(struct SOperatorInfo* pOperator, SOperatorParamType type, bool allFree); +int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup); +void* decodeSTimeWindowAggSupp(void* buf, STimeWindowAggSupp* pTwAggSup); +void destroyOperatorParamValue(void* pValues); +int32_t mergeOperatorParams(SOperatorParam* pDst, SOperatorParam* pSrc); +int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, int32_t srcOpType, bool tableSeq); +void freeExchangeGetBasicOperatorParam(void* pParam); +void freeOperatorParam(SOperatorParam* pParam, SOperatorParamType type); +void freeResetOperatorParams(struct SOperatorInfo* pOperator, SOperatorParamType type, bool allFree); SSDataBlock* getNextBlockFromDownstreamImpl(struct SOperatorInfo* pOperator, int32_t idx, bool clearParam); bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pBlockInfo); @@ -771,7 +779,7 @@ bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, bool compareVal(const char* v, const SStateKeys* pKey); int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, - TSKEY* primaryKeys, int32_t prevPosition, int32_t order); + TSKEY* primaryKeys, int32_t prevPosition, int32_t order); #ifdef __cplusplus } diff --git a/source/libs/executor/inc/operator.h b/source/libs/executor/inc/operator.h index 6335ac8181..13da9f7238 100644 --- a/source/libs/executor/inc/operator.h +++ b/source/libs/executor/inc/operator.h @@ -116,7 +116,7 @@ SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMerge SOperatorInfo* createMergeAlignedIntervalOperatorInfo(SOperatorInfo* downstream, SMergeAlignedIntervalPhysiNode* pNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); +SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle); SOperatorInfo* createSessionAggOperatorInfo(SOperatorInfo* downstream, SSessionWinodwPhysiNode* pSessionNode, SExecTaskInfo* pTaskInfo); @@ -146,7 +146,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle); -SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h index 3690e8d234..fcafd5a4e3 100644 --- a/source/libs/executor/inc/querytask.h +++ b/source/libs/executor/inc/querytask.h @@ -70,8 +70,6 @@ typedef struct { SVersionRange fillHistoryVer; STimeWindow fillHistoryWindow; SStreamState* pState; - int64_t dataVersion; - int64_t checkPointId; } SStreamTaskInfo; struct SExecTaskInfo { diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index 53c7c073ed..abe566473f 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -54,8 +54,8 @@ typedef struct SDataDispatchHandle { // clang-format off // data format: // +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+ -// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data | .... | | (4 bytes) |(8 bytes) -// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | | +// |SDataCacheEntry | version | total length | numOfRows | group id | col1_schema | col2_schema | col3_schema... | column#1 length, column#2 length...| col1 bitmap | col1 data | col2 bitmap | col2 data | +// | | sizeof(int32_t) |sizeof(int32) | sizeof(int32)| sizeof(uint64_t) | (sizeof(int8_t)+sizeof(int32_t))*numOfCols | sizeof(int32_t) * numOfCols | actual size | | | // +----------------+------------------+--------------+--------------+------------------+--------------------------------------------+------------------------------------+-------------+-----------+-------------+-----------+ // The length of bitmap is decided by number of rows of this data block, and the length of each column data is // recorded in the first segment, next to the struct header diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 8d4b1c0ea0..60dc6f0185 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -14,9 +14,9 @@ */ #include "executor.h" -#include -#include #include "executorInt.h" +#include "trpc.h" +#include "wal.h" #include "operator.h" #include "planner.h" #include "querytask.h" @@ -149,11 +149,15 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - SPackedData tmp = { .pDataBlock = pDataBlock }; + SPackedData tmp = {.pDataBlock = pDataBlock}; taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; + } else if (type == STREAM_INPUT__CHECKPOINT) { + SPackedData tmp = {.pDataBlock = input}; + taosArrayPush(pInfo->pBlockLists, &tmp); + pInfo->blockType = STREAM_INPUT__CHECKPOINT; } else { ASSERT(0); } @@ -162,7 +166,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } } -void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI *pAPI) { +void doSetTaskId(SOperatorInfo* pOperator, SStorageAPI* pAPI) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pStreamScanInfo = pOperator->info; @@ -203,13 +207,6 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { return code; } -void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) { - SExecTaskInfo* pTaskInfo = tinfo; - *dataVer = pTaskInfo->streamInfo.dataVersion; - *ckId = pTaskInfo->streamInfo.checkPointId; -} - - int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; @@ -330,7 +327,7 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v } static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList, const char* idstr, - SStorageAPI* pAPI) { + SStorageAPI* pAPI) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); int32_t numOfUids = taosArrayGetSize(tableIdList); if (numOfUids == 0) { @@ -341,7 +338,7 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S uint64_t suid = 0; uint64_t uid = 0; - int32_t type = 0; + int32_t type = 0; tableListGetSourceTableInfo(pTableScanInfo->base.pTableListInfo, &suid, &uid, &type); // let's discard the tables those are not created according to the queried super table. @@ -1156,7 +1153,7 @@ void qStreamSetOpen(qTaskInfo_t tinfo) { int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; SOperatorInfo* pOperator = pTaskInfo->pRoot; const char* id = GET_TASKID(pTaskInfo); @@ -1193,7 +1190,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT pScanBaseInfo->dataReader = NULL; SStoreTqReader* pReaderAPI = &pTaskInfo->storageAPI.tqReaderFn; - SWalReader* pWalReader = pReaderAPI->tqReaderGetWalReader(pInfo->tqReader); + SWalReader* pWalReader = pReaderAPI->tqReaderGetWalReader(pInfo->tqReader); walReaderVerifyOffset(pWalReader, pOffset); if (pReaderAPI->tqReaderSeek(pInfo->tqReader, pOffset->version, id) < 0) { qError("tqReaderSeek failed ver:%" PRId64 ", %s", pOffset->version, id); @@ -1251,8 +1248,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT pScanInfo->scanTimes = 0; if (pScanBaseInfo->dataReader == NULL) { - int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, - pScanInfo->pResBlock, (void**) &pScanBaseInfo->dataReader, id, false, NULL); + int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen( + pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, pScanInfo->pResBlock, + (void**)&pScanBaseInfo->dataReader, id, false, NULL); if (code != TSDB_CODE_SUCCESS) { qError("prepare read tsdb snapshot failed, uid:%" PRId64 ", code:%s %s", pOffset->uid, tstrerror(code), id); terrno = code; @@ -1310,8 +1308,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT STableKeyInfo* pList = tableListGetInfo(pTableListInfo, 0); int32_t size = tableListGetSize(pTableListInfo); - pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, (void**) &pInfo->dataReader, NULL, - false, NULL); + pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, + (void**)&pInfo->dataReader, NULL, false, NULL); cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond); strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName); @@ -1369,7 +1367,7 @@ void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = tinfo; - SArray* plist = getTableListInfo(pTaskInfo); + SArray* plist = getTableListInfo(pTaskInfo); // only extract table in the first elements STableListInfo* pTableListInfo = taosArrayGetP(plist, 0); @@ -1377,7 +1375,7 @@ SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { SArray* pUidList = taosArrayInit(10, sizeof(uint64_t)); int32_t numOfTables = tableListGetSize(pTableListInfo); - for(int32_t i = 0; i < numOfTables; ++i) { + for (int32_t i = 0; i < numOfTables; ++i) { STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i); taosArrayPush(pUidList, &pKeyInfo->uid); } diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index dda8b133ca..519a308c3a 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -30,10 +30,10 @@ #include "operator.h" #include "query.h" #include "querytask.h" +#include "storageapi.h" #include "tcompare.h" #include "thash.h" #include "ttypes.h" -#include "storageapi.h" #define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN) #define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) @@ -547,6 +547,7 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD if (status == FILTER_RESULT_ALL_QUALIFIED) { // here nothing needs to be done } else if (status == FILTER_RESULT_NONE_QUALIFIED) { + trimDataBlock(pBlock, pBlock->info.rows, NULL); pBlock->info.rows = 0; } else if (status == FILTER_RESULT_PARTIAL_QUALIFIED) { trimDataBlock(pBlock, pBlock->info.rows, (bool*)pIndicator); @@ -697,8 +698,8 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { uint32_t newSize = pBlock->info.rows + pRow->numOfRows + ((numOfRows - i) > 1 ? 1 : 0); blockDataEnsureCapacity(pBlock, newSize); - qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", - newSize, pBlock->info.capacity, GET_TASKID(pTaskInfo)); + qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", newSize, + pBlock->info.capacity, GET_TASKID(pTaskInfo)); // todo set the pOperator->resultInfo size } @@ -722,9 +723,9 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; - SSDataBlock* pBlock = pbInfo->pRes; + SSDataBlock* pBlock = pbInfo->pRes; // set output datablock version pBlock->info.version = pTaskInfo->version; @@ -737,10 +738,12 @@ void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGr // clear the existed group id pBlock->info.id.groupId = 0; ASSERT(!pbInfo->mergeResultBlock); - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + false); void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { + if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < + 0) { pBlock->info.parTbName[0] = 0; } else { memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); @@ -765,10 +768,12 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG // clear the existed group id pBlock->info.id.groupId = 0; if (!pbInfo->mergeResultBlock) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, false); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + false); } else { while (hasRemainResults(pGroupResInfo)) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, true); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, + true); if (pBlock->info.rows >= pOperator->resultInfo.threshold) { break; } @@ -966,10 +971,10 @@ int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t si return TSDB_CODE_SUCCESS; } -int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, - SExprSupp* pSup, SGroupResInfo* pGroupResInfo) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; +int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; SExprInfo* pExprInfo = pSup->pExprInfo; int32_t numOfExprs = pSup->numOfExprs; @@ -986,8 +991,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa // ASSERT(code == 0); if (code == -1) { // for history - qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey, - pKey->win.ekey, pKey->groupId); + qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 "", + pKey->win.skey, pKey->win.ekey, pKey->groupId); pGroupResInfo->index += 1; continue; } @@ -1004,7 +1009,8 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa pBlock->info.id.groupId = pKey->groupId; void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { + if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, + &tbname) < 0) { pBlock->info.parTbName[0] = 0; } else { memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index f836e71bc9..9fce058c4c 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -1367,6 +1367,7 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { memcpy(pInfo->pSrcBlock->info.parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); pInfo->srcRowIndex = -1; } break; + case STREAM_CHECKPOINT: case STREAM_CREATE_CHILD_TABLE: { return pBlock; } break; diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 9ec95faa38..fb2204eae8 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1130,9 +1130,13 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pDelRes; } break; - default: - ASSERTS(pBlock->info.type == STREAM_CREATE_CHILD_TABLE || pBlock->info.type == STREAM_RETRIEVE, "invalid SSDataBlock type"); + case STREAM_CREATE_CHILD_TABLE: + case STREAM_RETRIEVE: + case STREAM_CHECKPOINT: { return pBlock; + } + default: + ASSERTS(0, "invalid SSDataBlock type"); } // there is an scalar expression that needs to be calculated right before apply the group aggregation. @@ -1185,8 +1189,8 @@ void initParDownStream(SOperatorInfo* downstream, SPartitionBySupporter* pParSup SStreamScanInfo* pScanInfo = downstream->info; pScanInfo->partitionSup = *pParSup; pScanInfo->pPartScalarSup = pExpr; - if (!pScanInfo->igCheckUpdate && !pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0); + if (!pScanInfo->pUpdateInfo) { + pScanInfo->pUpdateInfo = pAPI->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, 0, pScanInfo->igCheckUpdate); } } diff --git a/source/libs/executor/src/operator.c b/source/libs/executor/src/operator.c index d80cf812f8..6f9aac7595 100644 --- a/source/libs/executor/src/operator.c +++ b/source/libs/executor/src/operator.c @@ -479,7 +479,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { - pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); + pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode; pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); @@ -488,10 +488,10 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) { int32_t children = 0; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) { int32_t children = pHandle->numOfVgroups; - pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) { pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8d35a02c57..d0b892e0f1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -38,11 +38,12 @@ int32_t scanDebug = 0; -#define MULTI_READER_MAX_TABLE_NUM 5000 -#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) -#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) -#define STREAM_SCAN_OP_NAME "StreamScanOperator" -#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState" +#define MULTI_READER_MAX_TABLE_NUM 5000 +#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) +#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) +#define STREAM_SCAN_OP_NAME "StreamScanOperator" +#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState" +#define STREAM_SCAN_OP_CHECKPOINT_NAME "StreamScanOperator_Checkpoint" typedef struct STableMergeScanExecInfo { SFileBlockLoadRecorder blockRecorder; @@ -1958,23 +1959,46 @@ static void doCheckUpdate(SStreamScanInfo* pInfo, TSKEY endKey, SSDataBlock* pBl } } -//int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) { -// int32_t len = updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo); -// *pBuff = taosMemoryCalloc(1, len); -// updateInfoSerialize(*pBuff, len, pInfo->pUpdateInfo); -// return len; -//} +int32_t streamScanOperatorEncode(SStreamScanInfo* pInfo, void** pBuff) { + int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo); + len += encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup); + *pBuff = taosMemoryCalloc(1, len); + void* buf = *pBuff; + encodeSTimeWindowAggSupp(&buf, &pInfo->twAggSup); + pInfo->stateStore.updateInfoSerialize(buf, len, pInfo->pUpdateInfo); + return len; +} + +void streamScanOperatorSaveCheckpoint(SStreamScanInfo* pInfo) { + if (!pInfo->pState) { + return; + } + void* pBuf = NULL; + int32_t len = streamScanOperatorEncode(pInfo, &pBuf); + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), pBuf, len); + taosMemoryFree(pBuf); + pInfo->stateStore.streamStateCommit(pInfo->pState); +} // other properties are recovered from the execution plan void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo) { if (!pBuff || len == 0) { return; } + void* buf = pBuff; + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + int32_t tlen = len - encodeSTimeWindowAggSupp(NULL, &pInfo->twAggSup); + if (tlen == 0) { + return; + } void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); - int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo); + int32_t code = pInfo->stateStore.updateInfoDeserialize(buf, tlen, pUpInfo); if (code == TSDB_CODE_SUCCESS) { + pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo); pInfo->pUpdateInfo = pUpInfo; + } else { + taosMemoryFree(pUpInfo); } } @@ -2155,6 +2179,9 @@ FETCH_NEXT_BLOCK: } } } break; + case STREAM_CHECKPOINT: { + qError("stream check point error. msg type: STREAM_INPUT__DATA_BLOCK"); + } break; default: break; } @@ -2295,6 +2322,23 @@ FETCH_NEXT_BLOCK: } goto NEXT_SUBMIT_BLK; + } else if (pInfo->blockType == STREAM_INPUT__CHECKPOINT) { + if (pInfo->validBlockIndex >= total) { + doClearBufferedBlocks(pInfo); + return NULL; + } + + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); + + SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); + SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + + if (pBlock->info.type == STREAM_CHECKPOINT) { + streamScanOperatorSaveCheckpoint(pInfo); + } + // printDataBlock(pBlock, "stream scan ck"); + return pInfo->pCheckpointRes; } return NULL; @@ -2458,11 +2502,12 @@ static void destroyStreamScanOperatorInfo(void* param) { pStreamScan->stateStore.updateInfoDestroy(pStreamScan->pUpdateInfo); blockDataDestroy(pStreamScan->pRes); blockDataDestroy(pStreamScan->pUpdateRes); - blockDataDestroy(pStreamScan->pPullDataRes); blockDataDestroy(pStreamScan->pDeleteDataRes); blockDataDestroy(pStreamScan->pUpdateDataRes); blockDataDestroy(pStreamScan->pCreateTbRes); taosArrayDestroy(pStreamScan->pBlockLists); + blockDataDestroy(pStreamScan->pCheckpointRes); + taosMemoryFree(pStreamScan); } @@ -2669,7 +2714,6 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; pInfo->windowSup = (SWindowSupporter){.pStreamAggSup = NULL, .gap = -1, .parentType = QUERY_NODE_PHYSICAL_PLAN}; pInfo->groupId = 0; - pInfo->pPullDataRes = createSpecialDataBlock(STREAM_RETRIEVE); pInfo->pStreamScanOp = pOperator; pInfo->deleteDataIndex = 0; pInfo->pDeleteDataRes = createSpecialDataBlock(STREAM_DELETE_DATA); @@ -2683,14 +2727,17 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->pState = pTaskInfo->streamInfo.pState; pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->readerFn = pTaskInfo->storageAPI.tqReaderFn; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); // for stream if (pTaskInfo->streamInfo.pState) { void* buff = NULL; int32_t len = 0; - pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_NAME, strlen(STREAM_SCAN_OP_NAME), &buff, &len); - streamScanOperatorDecode(buff, len, pInfo); - taosMemoryFree(buff); + int32_t res = pAPI->stateStore.streamStateGetInfo(pTaskInfo->streamInfo.pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + streamScanOperatorDecode(buff, len, pInfo); + taosMemoryFree(buff); + } } setOperatorInfo(pOperator, STREAM_SCAN_OP_NAME, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, false, OP_NOT_OPENED, pInfo, diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 01514ea88a..c0e2a44153 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -18,6 +18,7 @@ #include "functionMgt.h" #include "operator.h" #include "querytask.h" +#include "tchecksum.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" @@ -26,12 +27,15 @@ #include "tlog.h" #include "ttime.h" -#define IS_FINAL_INTERVAL_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) -#define IS_FINAL_SESSION_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) -#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); -#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" -#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" -#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" +#define IS_FINAL_INTERVAL_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) +#define IS_FINAL_SESSION_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) +#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); +#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" +#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" +#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" +#define STREAM_INTERVAL_OP_CHECKPOINT_NAME "StreamIntervalOperator_Checkpoint" +#define STREAM_SESSION_OP_CHECKPOINT_NAME "StreamSessionOperator_Checkpoint" +#define STREAM_STATE_OP_CHECKPOINT_NAME "StreamStateOperator_Checkpoint" typedef struct SStateWindowInfo { SResultWindowInfo winInfo; @@ -353,7 +357,7 @@ static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWin for (int32_t i = *index; i < size; i++) { SWinKey* pWin = taosArrayGet(pWins, i); void* tbname = NULL; - pInfo->statestore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); + pInfo->stateStore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); if (tbname == NULL) { appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, NULL); } else { @@ -361,7 +365,7 @@ static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWin STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, parTbName); } - pInfo->statestore.streamStateFreeVal(tbname); + pInfo->stateStore.streamStateFreeVal(tbname); (*index)++; } } @@ -381,7 +385,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pPullDataRes); taosArrayDestroy(pInfo->pDelWins); blockDataDestroy(pInfo->pDelRes); - pInfo->statestore.streamFileStateDestroy(pInfo->pState->pFileState); + pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); nodesDestroyNode((SNode*)pInfo->pPhyNode); @@ -392,6 +396,8 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { pInfo->pUpdatedMap = NULL; pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -416,7 +422,8 @@ void initIntervalDownStream(SOperatorInfo* downstream, uint16_t type, SStreamInt pScanInfo->windowSup.parentType = type; pScanInfo->windowSup.pIntervalAggSup = &pInfo->aggSup; if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark); + pScanInfo->pUpdateInfo = + pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark, pScanInfo->igCheckUpdate); } pScanInfo->interval = pInfo->interval; @@ -513,7 +520,7 @@ static void clearStreamIntervalOperator(SStreamIntervalOperatorInfo* pInfo) { clearDiskbasedBuf(pInfo->aggSup.pResultBuf); initResultRowInfo(&pInfo->binfo.resultRowInfo); pInfo->aggSup.currentPageId = -1; - pInfo->statestore.streamStateClear(pInfo->pState); + pInfo->stateStore.streamStateClear(pInfo->pState); } static void clearSpecialDataBlock(SSDataBlock* pBlock) { @@ -745,11 +752,6 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN return startPos; } -static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { - pTaskInfo->streamInfo.dataVersion = version; - pTaskInfo->streamInfo.checkPointId = ckId; -} - static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, uint64_t groupId, SSHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperator->info; @@ -794,7 +796,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat .groupId = groupId, }; void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); - if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->statestore) && isClosed && + if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->stateStore) && isClosed && !chIds) { SPullWindowInfo pull = { .window = nextWin, .groupId = groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; @@ -826,7 +828,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat } int32_t code = setIntervalOutputBuf(pInfo->pState, &nextWin, &pResPos, groupId, pSup->pCtx, numOfOutput, - pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->statestore); + pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->stateStore); pResult = (SResultRow*)pResPos->pRowBuff; if (code != TSDB_CODE_SUCCESS || pResult == NULL) { T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); @@ -914,6 +916,214 @@ static void resetUnCloseWinInfo(SSHashObj* winMap) { } } +int32_t encodeSWinKey(void** buf, SWinKey* key) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, key->ts); + tlen += taosEncodeFixedU64(buf, key->groupId); + return tlen; +} + +void* decodeSWinKey(void* buf, SWinKey* key) { + buf = taosDecodeFixedI64(buf, &key->ts); + buf = taosDecodeFixedU64(buf, &key->groupId); + return buf; +} + +int32_t encodeSRowBuffPos(void** buf, SRowBuffPos* pos) { + int32_t tlen = 0; + tlen += encodeSWinKey(buf, pos->pKey); + return tlen; +} + +void* decodeSRowBuffPos(void* buf, SRowBuffPos* pos) { + buf = decodeSWinKey(buf, pos->pKey); + return buf; +} + +int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, pTwAggSup->minTs); + tlen += taosEncodeFixedI64(buf, pTwAggSup->maxTs); + return tlen; +} + +void* decodeSTimeWindowAggSupp(void* buf, STimeWindowAggSupp* pTwAggSup) { + buf = taosDecodeFixedI64(buf, &pTwAggSup->minTs); + buf = taosDecodeFixedI64(buf, &pTwAggSup->maxTs); + return buf; +} + +int32_t encodeSTimeWindow(void** buf, STimeWindow* pWin) { + int32_t tlen = 0; + tlen += taosEncodeFixedI64(buf, pWin->skey); + tlen += taosEncodeFixedI64(buf, pWin->ekey); + return tlen; +} + +void* decodeSTimeWindow(void* buf, STimeWindow* pWin) { + buf = taosDecodeFixedI64(buf, &pWin->skey); + buf = taosDecodeFixedI64(buf, &pWin->ekey); + return buf; +} + +int32_t encodeSPullWindowInfo(void** buf, SPullWindowInfo* pPullInfo) { + int32_t tlen = 0; + tlen += encodeSTimeWindow(buf, &pPullInfo->calWin); + tlen += taosEncodeFixedU64(buf, pPullInfo->groupId); + tlen += encodeSTimeWindow(buf, &pPullInfo->window); + return tlen; +} + +void* decodeSPullWindowInfo(void* buf, SPullWindowInfo* pPullInfo) { + buf = decodeSTimeWindow(buf, &pPullInfo->calWin); + buf = taosDecodeFixedU64(buf, &pPullInfo->groupId); + buf = decodeSTimeWindow(buf, &pPullInfo->window); + return buf; +} + +int32_t encodeSPullWindowInfoArray(void** buf, SArray* pPullInfos) { + int32_t tlen = 0; + int32_t size = taosArrayGetSize(pPullInfos); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + void* pItem = taosArrayGet(pPullInfos, i); + tlen += encodeSPullWindowInfo(buf, pItem); + } + return tlen; +} + +void* decodeSPullWindowInfoArray(void* buf, SArray* pPullInfos) { + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + for (int32_t i = 0; i < size; i++) { + SPullWindowInfo item = {0}; + buf = decodeSPullWindowInfo(buf, &item); + taosArrayPush(pPullInfos, &item); + } + return buf; +} + +int32_t doStreamIntervalEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.pResultRowHashTable + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->aggSup.pResultRowHashTable); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->aggSup.pResultRowHashTable, pIte, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pIte, &keyLen); + tlen += encodeSWinKey(buf, key); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pPullDataMap + int32_t size = taosHashGetSize(pInfo->pPullDataMap); + tlen += taosEncodeFixedI32(buf, size); + pIte = NULL; + keyLen = 0; + while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSWinKey(buf, key); + SArray* pArray = (SArray*)pIte; + int32_t chSize = taosArrayGetSize(pArray); + tlen += taosEncodeFixedI32(buf, chSize); + for (int32_t i = 0; i < chSize; i++) { + void* pChItem = taosArrayGet(pArray, i); + tlen += taosEncodeFixedI32(buf, *(int32_t*)pChItem); + } + } + + // 4.pPullWins + tlen += encodeSPullWindowInfoArray(buf, pInfo->pPullWins); + + // 5.dataVersion + tlen += taosEncodeFixedI64(buf, pInfo->dataVersion); + + // 6.checksum + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + + return tlen; +} + +void doStreamIntervalDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return; + } + + // 6.checksum + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return; + } + + // 1.pResultRowHashTable + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SWinKey key = {0}; + buf = decodeSWinKey(buf, &key); + SRowBuffPos* pPos = NULL; + int32_t resSize = pInfo->aggSup.resultRowSize; + pInfo->stateStore.streamStateAddIfNotExist(pInfo->pState, &key, (void**)&pPos, &resSize); + tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pPos, POINTER_BYTES); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pPullDataMap + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + for (int32_t i = 0; i < size; i++) { + SWinKey key = {0}; + SArray* pArray = taosArrayInit(0, sizeof(int32_t)); + buf = decodeSWinKey(buf, &key); + int32_t chSize = 0; + buf = taosDecodeFixedI32(buf, &chSize); + for (int32_t i = 0; i < chSize; i++) { + int32_t chId = 0; + buf = taosDecodeFixedI32(buf, &chId); + taosArrayPush(pArray, &chId); + } + taosHashPut(pInfo->pPullDataMap, &key, sizeof(SWinKey), &pArray, POINTER_BYTES); + } + + // 4.pPullWins + buf = decodeSPullWindowInfoArray(buf, pInfo->pPullWins); + + // 5.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); +} + +void doStreamIntervalSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamIntervalEncodeOpState(NULL, 0, pOperator); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamIntervalEncodeOpState(&pBuf, len, pOperator); + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); +} static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; @@ -966,21 +1176,18 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); } + if (pInfo->reCkBlock) { + pInfo->reCkBlock = false; + printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pInfo->pCheckpointRes; + } + setOperatorCompleted(pOperator); if (!IS_FINAL_INTERVAL_OP(pOperator)) { clearFunctionContext(&pOperator->exprSupp); // semi interval operator clear disk buffer clearStreamIntervalOperator(pInfo); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - qDebug("stask:%s ===stream===%s clear", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); - } else { - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; - } - qDebug("stask:%s ===stream===%s close", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); + qDebug("===stream===clear semi operator"); } return NULL; } else { @@ -1075,6 +1282,11 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + doStreamIntervalSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -1155,7 +1367,7 @@ static void streamIntervalReleaseState(SOperatorInfo* pOperator) { if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; int32_t resSize = sizeof(TSKEY); - pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize); } SStreamIntervalOperatorInfo* pInfo = pOperator->info; @@ -1172,12 +1384,12 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; int32_t size = 0; void* pBuf = NULL; - int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + int32_t code = pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); TSKEY ts = *(TSKEY*)pBuf; taosMemoryFree(pBuf); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); - pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); + pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); } SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.reloadStreamStateFn) { @@ -1186,7 +1398,8 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { } SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, int32_t numOfChild) { + SExecTaskInfo* pTaskInfo, int32_t numOfChild, + SReadHandle* pHandle) { SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); @@ -1211,9 +1424,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, .deleteMark = getDeleteMark(pIntervalPhyNode), .deleteMarkSaved = 0, .calTriggerSaved = 0, - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), }; ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; @@ -1266,12 +1476,13 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pUpdated = NULL; pInfo->pUpdatedMap = NULL; int32_t funResSize = getMaxFunResSize(&pOperator->exprSupp, numOfCols); - pInfo->pState->pFileState = - pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, - compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit( + tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); pInfo->dataVersion = 0; - pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); pOperator->operatorType = pPhyNode->type; if (!IS_FINAL_INTERVAL_OP(pOperator) || numOfChild == 0) { @@ -1293,6 +1504,16 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, goto _error; } + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = pAPI->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamIntervalDecodeOpState(buff, len, pOperator); + taosMemoryFree(buff); + } + return pOperator; _error: @@ -1314,6 +1535,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { SStreamSessionAggOperatorInfo* pInfo = (SStreamSessionAggOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); + cleanupExprSupp(&pInfo->scalarSupp); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); @@ -1327,11 +1549,13 @@ void destroyStreamSessionAggOperatorInfo(void* param) { colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pWinBlock); - blockDataDestroy(pInfo->pUpdateRes); tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStDeleted); + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); taosArrayDestroy(pInfo->historyWins); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -1374,7 +1598,8 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin pScanInfo->windowSup = (SWindowSupporter){.pStreamAggSup = pAggSup, .gap = pAggSup->gap, .parentType = type}; pScanInfo->pState = pAggSup->pState; if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark); + pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark, + pScanInfo->igCheckUpdate); } pScanInfo->twAggSup = *pTwSup; } @@ -1651,6 +1876,31 @@ static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* return winNum; } +static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SResultRow* pCurResult = NULL; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + // Just look for the window behind StartIndex + while (1) { + SResultWindowInfo winInfo = {0}; + SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, NULL, pCurWin, &winInfo); + if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || + !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { + taosMemoryFree(winInfo.pOutputBuf); + pAPI->stateStore.streamStateFreeCur(pCur); + break; + } + pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); + doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); + pAPI->stateStore.streamStateFreeCur(pCur); + taosMemoryFree(winInfo.pOutputBuf); + } +} + int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, &pAggSup->stateStore); @@ -1858,6 +2108,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); continue; } @@ -1866,6 +2117,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); break; } } @@ -1876,7 +2128,9 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); saveResult(parentWin, pStUpdated); + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); } else { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); break; } } @@ -2003,6 +2257,137 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { } } +int32_t encodeSSessionKey(void** buf, SSessionKey* key) { + int32_t tlen = 0; + tlen += encodeSTimeWindow(buf, &key->win); + tlen += taosEncodeFixedU64(buf, key->groupId); + return tlen; +} + +void* decodeSSessionKey(void* buf, SSessionKey* key) { + buf = decodeSTimeWindow(buf, &key->win); + buf = taosDecodeFixedU64(buf, &key->groupId); + return buf; +} + +int32_t encodeSResultWindowInfo(void** buf, SResultWindowInfo* key, int32_t outLen) { + int32_t tlen = 0; + tlen += taosEncodeFixedBool(buf, key->isOutput); + tlen += encodeSSessionKey(buf, &key->sessionWin); + return tlen; +} + +void* decodeSResultWindowInfo(void* buf, SResultWindowInfo* key, int32_t outLen) { + buf = taosDecodeFixedBool(buf, &key->isOutput); + key->pOutputBuf = NULL; + buf = decodeSSessionKey(buf, &key->sessionWin); + return buf; +} + +int32_t doStreamSessionEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.streamAggSup.pResultRows + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->streamAggSup.pResultRows); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->streamAggSup.pResultRows, pIte, &iter)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSSessionKey(buf, key); + tlen += encodeSResultWindowInfo(buf, pIte, pInfo->streamAggSup.resultRowSize); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = taosArrayGetSize(pInfo->pChildren); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + tlen += doStreamSessionEncodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + tlen += taosEncodeFixedI32(buf, pInfo->dataVersion); + + // 5.checksum + if (isParent) { + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + } + + return tlen; +} + +void* doStreamSessionDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return buf; + } + + // 5.checksum + if (isParent) { + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return buf; + } + } + + // 1.streamAggSup.pResultRows + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SSessionKey key = {0}; + SResultWindowInfo winfo = {0}; + buf = decodeSSessionKey(buf, &key); + buf = decodeSResultWindowInfo(buf, &winfo, pInfo->streamAggSup.resultRowSize); + tSimpleHashPut(pInfo->streamAggSup.pResultRows, &key, sizeof(SSessionKey), &winfo, sizeof(SResultWindowInfo)); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + ASSERT(size <= taosArrayGetSize(pInfo->pChildren)); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + buf = doStreamSessionDecodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); + return buf; +} + +void doStreamSessionSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamSessionEncodeOpState(NULL, 0, pOperator, true); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamSessionEncodeOpState(&pBuf, len, pOperator, true); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_CHECKPOINT_NAME, + strlen(STREAM_SESSION_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); +} + static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SExprSupp* pSup = &pOperator->exprSupp; SStreamSessionAggOperatorInfo* pInfo = pOperator->info; @@ -2058,6 +2443,11 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAggSup->stateStore.streamStateCommit(pAggSup->pState); + doStreamSessionSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2115,13 +2505,11 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { } void streamSessionReleaseState(SOperatorInfo* pOperator) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); - pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, - resSize); - } + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, + resSize); SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { downstream->fpSet.releaseStreamStateFn(downstream); @@ -2133,6 +2521,33 @@ void resetWinRange(STimeWindow* winRange) { winRange->ekey = INT64_MAX; } +void streamSessionSemiReloadState(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SResultWindowInfo winInfo = {0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + for (int32_t i = 0; i < num; i++) { + SResultWindowInfo winInfo = {0}; + setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + compactSessionSemiWindow(pOperator, &winInfo); + saveSessionOutputBuf(pAggSup, &winInfo); + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + void streamSessionReloadState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; @@ -2249,7 +2664,19 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh if (pHandle) { pInfo->isHistoryOp = pHandle->fillHistory; } + + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = + pInfo->streamAggSup.stateStore.streamStateGetInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_CHECKPOINT_NAME, + strlen(STREAM_SESSION_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamSessionDecodeOpState(buff, len, pOperator, true); + taosMemoryFree(buff); + } setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, @@ -2316,7 +2743,6 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { while (1) { SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { - clearSpecialDataBlock(pInfo->pUpdateRes); pOperator->status = OP_RES_TO_RETURN; break; } @@ -2336,6 +2762,10 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAggSup->stateStore.streamStateCommit(pAggSup->pState); + doStreamSessionSaveCheckpoint(pOperator); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2357,6 +2787,11 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); tSimpleHashCleanup(pInfo->pStUpdated); pInfo->pStUpdated = NULL; + + if(pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); @@ -2387,12 +2822,12 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream pOperator->operatorType = pPhyNode->type; if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { - pInfo->pUpdateRes = createSpecialDataBlock(STREAM_CLEAR); - blockDataEnsureCapacity(pInfo->pUpdateRes, 128); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionSemiReloadState); } - setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); + setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, + pTaskInfo); if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); @@ -2428,6 +2863,7 @@ void destroyStreamStateOperatorInfo(void* param) { cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); cleanupGroupResInfo(&pInfo->groupResInfo); + cleanupExprSupp(&pInfo->scalarSupp); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { @@ -2441,6 +2877,9 @@ void destroyStreamStateOperatorInfo(void* param) { taosArrayDestroy(pInfo->historyWins); tSimpleHashCleanup(pInfo->pSeUpdated); tSimpleHashCleanup(pInfo->pSeDeleted); + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + blockDataDestroy(pInfo->pCheckpointRes); + taosMemoryFreeClear(param); } @@ -2648,6 +3087,109 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl } } +int32_t doStreamStateEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return 0; + } + + void* pData = (buf == NULL) ? NULL : *buf; + + // 1.streamAggSup.pResultRows + int32_t tlen = 0; + int32_t mapSize = tSimpleHashGetSize(pInfo->streamAggSup.pResultRows); + tlen += taosEncodeFixedI32(buf, mapSize); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->streamAggSup.pResultRows, pIte, &iter)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + tlen += encodeSSessionKey(buf, key); + tlen += encodeSResultWindowInfo(buf, pIte, pInfo->streamAggSup.resultRowSize); + } + + // 2.twAggSup + tlen += encodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = taosArrayGetSize(pInfo->pChildren); + tlen += taosEncodeFixedI32(buf, size); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + tlen += doStreamStateEncodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + tlen += taosEncodeFixedI32(buf, pInfo->dataVersion); + + // 5.checksum + if (isParent) { + if (buf) { + uint32_t cksum = taosCalcChecksum(0, pData, len - sizeof(uint32_t)); + tlen += taosEncodeFixedU32(buf, cksum); + } else { + tlen += sizeof(uint32_t); + } + } + + return tlen; +} + +void* doStreamStateDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOperator, bool isParent) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + if (!pInfo) { + return buf; + } + + // 5.checksum + if (isParent) { + int32_t dataLen = len - sizeof(uint32_t); + void* pCksum = POINTER_SHIFT(buf, dataLen); + if (taosCheckChecksum(buf, dataLen, *(uint32_t*)pCksum) != TSDB_CODE_SUCCESS) { + ASSERT(0); // debug + qError("stream interval state is invalid"); + return buf; + } + } + + // 1.streamAggSup.pResultRows + int32_t mapSize = 0; + buf = taosDecodeFixedI32(buf, &mapSize); + for (int32_t i = 0; i < mapSize; i++) { + SSessionKey key = {0}; + SResultWindowInfo winfo = {0}; + buf = decodeSSessionKey(buf, &key); + buf = decodeSResultWindowInfo(buf, &winfo, pInfo->streamAggSup.resultRowSize); + tSimpleHashPut(pInfo->streamAggSup.pResultRows, &key, sizeof(SSessionKey), &winfo, sizeof(SResultWindowInfo)); + } + + // 2.twAggSup + buf = decodeSTimeWindowAggSupp(buf, &pInfo->twAggSup); + + // 3.pChildren + int32_t size = 0; + buf = taosDecodeFixedI32(buf, &size); + ASSERT(size <= taosArrayGetSize(pInfo->pChildren)); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChOp = taosArrayGetP(pInfo->pChildren, i); + buf = doStreamStateDecodeOpState(buf, 0, pChOp, false); + } + + // 4.dataVersion + buf = taosDecodeFixedI64(buf, &pInfo->dataVersion); + return buf; +} + +void doStreamStateSaveCheckpoint(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + int32_t len = doStreamStateEncodeOpState(NULL, 0, pOperator, true); + void* buf = taosMemoryCalloc(1, len); + void* pBuf = buf; + len = doStreamStateEncodeOpState(&pBuf, len, pOperator, true); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, + strlen(STREAM_STATE_OP_CHECKPOINT_NAME), buf, len); +} + static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) { SStreamStateAggOperatorInfo* pInfo = pOperator->info; SOptrBasicInfo* pBInfo = &pInfo->binfo; @@ -2700,7 +3242,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { if (pBlock == NULL) { break; } - printDataBlock(pBlock, "single state recv", GET_TASKID(pTaskInfo)); + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || pBlock->info.type == STREAM_CLEAR) { @@ -2715,6 +3257,11 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pInfo->streamAggSup.stateStore.streamStateCommit(pInfo->streamAggSup.pState); + doStreamSessionSaveCheckpoint(pOperator); + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -2926,6 +3473,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->isHistoryOp = pHandle->fillHistory; } + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); + + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = + pInfo->streamAggSup.stateStore.streamStateGetInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, + strlen(STREAM_STATE_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamStateDecodeOpState(buff, len, pOperator, true); + taosMemoryFree(buff); + } + setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, @@ -2984,14 +3544,13 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); } - setOperatorCompleted(pOperator); - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; + if (pInfo->reCkBlock) { + pInfo->reCkBlock = false; + // printDataBlock(pInfo->pCheckpointRes, "single interval ck"); + return pInfo->pCheckpointRes; } + + setOperatorCompleted(pOperator); return NULL; } @@ -3030,6 +3589,12 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { printDataBlock(pBlock, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pBlock; + } else if (pBlock->info.type == STREAM_CHECKPOINT) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + doStreamIntervalSaveCheckpoint(pOperator); + pInfo->reCkBlock = true; + copyDataBlock(pInfo->pCheckpointRes, pBlock); + continue; } else { ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); } @@ -3078,7 +3643,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { } SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo) { + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -3100,16 +3665,11 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision, }; - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pIntervalPhyNode->window.watermark, - .calTrigger = pIntervalPhyNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode), - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), - }; + pInfo->twAggSup = (STimeWindowAggSupp){.waterMark = pIntervalPhyNode->window.watermark, + .calTrigger = pIntervalPhyNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + .deleteMark = getDeleteMark(pIntervalPhyNode)}; ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); @@ -3168,7 +3728,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, - pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -3176,8 +3736,19 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); - pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; + pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); + + // for stream + void* buff = NULL; + int32_t len = 0; + int32_t res = pAPI->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_CHECKPOINT_NAME, + strlen(STREAM_INTERVAL_OP_CHECKPOINT_NAME), &buff, &len); + if (res == TSDB_CODE_SUCCESS) { + doStreamIntervalDecodeOpState(buff, len, pOperator); + taosMemoryFree(buff); + } initIntervalDownStream(downstream, pPhyNode->type, pInfo); code = appendDownstream(pOperator, &downstream, 1); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 2405d3edef..db7c5e2570 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -18,6 +18,7 @@ #include "functionMgt.h" #include "operator.h" #include "querytask.h" +#include "tchecksum.h" #include "tcommon.h" #include "tcompare.h" #include "tdatablock.h" @@ -55,7 +56,6 @@ typedef enum SResultTsInterpType { RESULT_ROW_END_INTERP = 2, } SResultTsInterpType; - typedef struct SOpenWindowInfo { SResultRowPosition pos; uint64_t groupId; @@ -388,7 +388,7 @@ static bool setTimeWindowInterpolationEndTs(SIntervalAggOperatorInfo* pInfo, SEx bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, TSKEY calEnd, EStreamType blockType) { if (pInterval->interval != pInterval->sliding && - ((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart) )) { + ((pWin->ekey < calStart || pWin->skey > calEnd) || (blockType == STREAM_PULL_DATA && pWin->skey < calStart))) { return false; } @@ -400,7 +400,7 @@ bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pB } int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, - TSKEY* primaryKeys, int32_t prevPosition, int32_t order) { + TSKEY* primaryKeys, int32_t prevPosition, int32_t order) { bool ascQuery = (order == TSDB_ORDER_ASC); int32_t precision = pInterval->precision; @@ -632,8 +632,8 @@ static void doInterpUnclosedTimeWindow(SOperatorInfo* pOperatorInfo, int32_t num } static bool tsKeyCompFn(void* l, void* r, void* param) { - TSKEY* lTS = (TSKEY*)l; - TSKEY* rTS = (TSKEY*)r; + TSKEY* lTS = (TSKEY*)l; + TSKEY* rTS = (TSKEY*)r; SIntervalAggOperatorInfo* pInfo = param; return pInfo->binfo.outputTsOrder == ORDER_ASC ? *lTS < *rTS : *lTS > *rTS; } @@ -728,8 +728,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul } TSKEY ekey = ascScan ? win.ekey : win.skey; - int32_t forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder); + int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + pInfo->binfo.inputTsOrder); // prev time window not interpolation yet. if (pInfo->timeWindowInterpo) { @@ -756,7 +756,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul STimeWindow nextWin = win; while (1) { int32_t prevEndPos = forwardRows - 1 + startPos; - startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, pInfo->binfo.inputTsOrder); + startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, + pInfo->binfo.inputTsOrder); if (startPos < 0 || filterWindowWithLimit(pInfo, &nextWin, tableGroupId)) { break; } @@ -768,8 +769,8 @@ static bool hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul } ekey = ascScan ? nextWin.ekey : nextWin.skey; - forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->binfo.inputTsOrder); + forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + pInfo->binfo.inputTsOrder); // window start(end) key interpolation doWindowBorderInterpolation(pInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pSup); // TODO: add to open window? how to close the open windows after input blocks exhausted? @@ -1116,7 +1117,6 @@ static void doClearWindowImpl(SResultRowPosition* p1, SDiskbasedBuf* pResultBuf, releaseBufPage(pResultBuf, bufPage); } - static void destroyStateWindowOperatorInfo(void* param) { SStateWindowOperatorInfo* pInfo = (SStateWindowOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); @@ -1153,7 +1153,6 @@ void destroyIntervalOperatorInfo(void* param) { taosMemoryFreeClear(param); } - static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SIntervalAggOperatorInfo* pInfo) { // the primary timestamp column bool needed = false; @@ -1208,13 +1207,6 @@ static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SInt return needed; } - -void initStreamFunciton(SqlFunctionCtx* pCtx, int32_t numOfExpr) { - for (int32_t i = 0; i < numOfExpr; i++) { - // pCtx[i].isStream = true; - } -} - SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo) { SIntervalAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIntervalAggOperatorInfo)); @@ -1235,8 +1227,8 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPh int32_t num = 0; SExprInfo* pExprInfo = createExprInfo(pPhyNode->window.pFuncs, NULL, &num); - int32_t code = - initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str, pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore); + int32_t code = initAggSup(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str, + pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -1476,7 +1468,8 @@ SOperatorInfo* createStatewindowOperatorInfo(SOperatorInfo* downstream, SStateWi if (pStateNode->window.pExprs != NULL) { int32_t numOfScalarExpr = 0; SExprInfo* pScalarExprInfo = createExprInfo(pStateNode->window.pExprs, NULL, &numOfScalarExpr); - int32_t code = initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore); + int32_t code = + initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -1615,7 +1608,6 @@ _error: return NULL; } - void destroyMAIOperatorInfo(void* param) { SMergeAlignedIntervalAggOperatorInfo* miaInfo = (SMergeAlignedIntervalAggOperatorInfo*)param; destroyIntervalOperatorInfo(miaInfo->intervalAggOperatorInfo); @@ -1979,8 +1971,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* } TSKEY ekey = ascScan ? win.ekey : win.skey; - int32_t forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder); + int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + iaInfo->binfo.inputTsOrder); ASSERT(forwardRows > 0); // prev time window not interpolation yet. @@ -2010,8 +2002,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* STimeWindow nextWin = win; while (1) { int32_t prevEndPos = forwardRows - 1 + startPos; - startPos = - getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, iaInfo->binfo.inputTsOrder); + startPos = getNextQualifiedWindow(&iaInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, + iaInfo->binfo.inputTsOrder); if (startPos < 0) { break; } @@ -2025,8 +2017,8 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* } ekey = ascScan ? nextWin.ekey : nextWin.skey; - forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, iaInfo->binfo.inputTsOrder); + forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, + iaInfo->binfo.inputTsOrder); // window start(end) key interpolation doWindowBorderInterpolation(iaInfo, pBlock, pResult, &nextWin, startPos, forwardRows, pExprSup); diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index 345020cee2..00e0e68b96 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -336,6 +336,16 @@ bool fmIsSameInOutType(int32_t funcId) { return res; } +bool fmIsConstantResFunc(SFunctionNode* pFunc) { + SNode* pNode; + FOREACH(pNode, pFunc->pParameterList) { + if (nodeType(pNode) != QUERY_NODE_VALUE) { + return false; + } + } + return true; +} + void getLastCacheDataType(SDataType* pType) { pType->bytes = getFirstLastInfoSize(pType->bytes) + VARSTR_HEADER_SIZE; pType->type = TSDB_DATA_TYPE_BINARY; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 1c27e2d2d8..9656bce892 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3727,11 +3727,30 @@ static int32_t translateInterp(STranslateContext* pCxt, SSelectStmt* pSelect) { return code; } +static int32_t removeConstantValueFromList(SNodeList** pList) { + SNode* pNode = NULL; + WHERE_EACH(pNode, *pList) { + if (nodeType(pNode) == QUERY_NODE_VALUE || + (nodeType(pNode) == QUERY_NODE_FUNCTION && fmIsConstantResFunc((SFunctionNode*)pNode) && fmIsScalarFunc(((SFunctionNode*)pNode)->funcId))) { + ERASE_NODE(*pList); + continue; + } + WHERE_NEXT; + } + + if (*pList && (*pList)->length <= 0) { + nodesDestroyList(*pList); + *pList = NULL; + } + + return TSDB_CODE_SUCCESS; +} + static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_PARTITION_BY; int32_t code = TSDB_CODE_SUCCESS; - - if (pSelect->pPartitionByList) { + + if (TSDB_CODE_SUCCESS == code && pSelect->pPartitionByList) { int8_t typeType = getTableTypeFromTableNode(pSelect->pFromTable); SNode* pPar = nodesListGetNode(pSelect->pPartitionByList, 0); if (!((TSDB_NORMAL_TABLE == typeType || TSDB_CHILD_TABLE == typeType) && 1 == pSelect->pPartitionByList->length && @@ -3941,6 +3960,11 @@ static int32_t translateSelectFrom(STranslateContext* pCxt, SSelectStmt* pSelect if (TSDB_CODE_SUCCESS == code) { code = replaceTbName(pCxt, pSelect); } + if (TSDB_CODE_SUCCESS == code) { + if (pSelect->pPartitionByList) { + code = removeConstantValueFromList(&pSelect->pPartitionByList); + } + } return code; } diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 39cf30cc65..792d00833d 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -138,7 +138,7 @@ static char* getSyntaxErrFormat(int32_t errCode) { case TSDB_CODE_PAR_CANNOT_DROP_PRIMARY_KEY: return "Primary timestamp column cannot be dropped"; case TSDB_CODE_PAR_INVALID_MODIFY_COL: - return "Only binary/nchar/geometry column length could be modified, and the length can only be increased, not decreased"; + return "Only varbinary/binary/nchar/geometry column length could be modified, and the length can only be increased, not decreased"; case TSDB_CODE_PAR_INVALID_TBNAME: return "Invalid tbname pseudo column"; case TSDB_CODE_PAR_INVALID_FUNCTION_NAME: diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 51d5c96c86..f3de3cb2f8 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -972,6 +972,9 @@ static int32_t pushDownCondOptDealJoin(SOptimizeContext* pCxt, SJoinLogicNode* p if (TSDB_CODE_SUCCESS == code) { code = pushDownCondOptJoinExtractEqualOnCond(pCxt, pJoin); } + if (TSDB_CODE_SUCCESS == code) { + code = pushDownCondOptAppendFilterCol(pCxt, pJoin); + } if (TSDB_CODE_SUCCESS == code) { OPTIMIZE_FLAG_SET_MASK(pJoin->node.optimizedFlag, OPTIMIZE_FLAG_PUSH_DOWN_CONDE); pCxt->optimized = true; diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index b6bc9c888b..39854d1824 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -44,9 +44,11 @@ typedef struct { int64_t defaultCfInit; } SBackendWrapper; -void* streamBackendInit(const char* path); +void* streamBackendInit(const char* path, int64_t chkpId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); +int32_t streamBackendLoadCheckpointInfo(void* pMeta); +int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); @@ -135,5 +137,10 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb void* val, int32_t vlen, int64_t ttl, void* tmpBuf); int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch); +int32_t streamBackendTriggerChkp(void* pMeta, char* dst); + +int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId); +int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); + // int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); #endif \ No newline at end of file diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index ffd0eedea1..bb81582a2d 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -19,7 +19,7 @@ #include "executor.h" #include "query.h" #include "tstream.h" - +#include "streamBackendRocksdb.h" #include "trpc.h" #ifdef __cplusplus @@ -41,11 +41,15 @@ typedef struct { } SStreamContinueExecInfo; extern SStreamGlobalEnv streamEnv; +extern int32_t streamBackendId; +extern int32_t streamBackendCfWrapperId; -void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); -int32_t streamDispatchStreamBlock(SStreamTask* pTask); +const char* streamGetBlockTypeStr(int32_t type); +void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); +int32_t streamDispatchStreamBlock(SStreamTask* pTask); -SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); +int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); +SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, SArray* pRes); void destroyStreamDataBlock(SStreamDataBlock* pBlock); @@ -55,21 +59,24 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); +int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); -int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet); +int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask); +int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); +int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks); SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen); int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); -extern int32_t streamBackendId; -extern int32_t streamBackendCfWrapperId; - #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 5b6238330d..1f93498557 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,10 +16,6 @@ #include "streamInt.h" #include "ttimer.h" -#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 -#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) - -#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F) SStreamGlobalEnv streamEnv; int32_t streamInit() { @@ -30,7 +26,7 @@ int32_t streamInit() { } if (old == 0) { - streamEnv.timer = taosTmrInit(10000, 100, 10000, "STREAM"); + streamEnv.timer = taosTmrInit(1000, 100, 10000, "STREAM"); if (streamEnv.timer == NULL) { atomic_store_8(&streamEnv.inited, 0); return -1; @@ -63,11 +59,10 @@ char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { static void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; - int8_t status = atomic_load_8(&pTask->triggerStatus); - qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->triggerParam); + int8_t status = atomic_load_8(&pTask->schedInfo.status); + qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->info.triggerParam); if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { - streamMetaReleaseTask(NULL, pTask); qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr); return; } @@ -85,29 +80,29 @@ static void streamSchedByTimer(void* param, void* tmrId) { return; } - atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); + atomic_store_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE); pTrigger->pBlock->info.type = STREAM_GET_ALL; - if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pTrigger) < 0) { + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger) < 0) { taosFreeQitem(pTrigger); - taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer); + taosTmrReset(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); return; } streamSchedExec(pTask); } - taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer); + taosTmrReset(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); } int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { - if (pTask->triggerParam != 0 && pTask->info.fillHistory == 0) { + if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); - ASSERT(ref == 2 && pTask->schedTimer == NULL); + ASSERT(ref == 2 && pTask->schedInfo.pTimer == NULL); - qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam); + qDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); - pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer); - pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE; + pTask->schedInfo.pTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer); + pTask->schedInfo.status = TASK_TRIGGER_STATUS__INACTIVE; } return 0; @@ -141,19 +136,60 @@ int32_t streamSchedExec(SStreamTask* pTask) { return 0; } +static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { + *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); + if (*pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); + SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); + + pDispatchRsp->inputStatus = status; + pDispatchRsp->streamId = htobe64(pReq->streamId); + pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); + pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); + pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); + pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); + + return TSDB_CODE_SUCCESS; +} + +static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { + int8_t status = 0; + + SStreamDataBlock* pBlock = createStreamBlockFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); + if (pBlock == NULL) { + streamTaskInputFail(pTask); + status = TASK_INPUT_STATUS__FAILED; + qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, + pTask->id.idStr); + } else { + if (pBlock->type == STREAM_INPUT__TRANS_STATE) { + pTask->status.appendTranstateBlock = true; + } + + int32_t code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pBlock); + // input queue is full, upstream is blocked now + status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; + } + + return status; +} + int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); int8_t status = TASK_INPUT_STATUS__NORMAL; // enqueue if (pData != NULL) { - qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, - pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); + qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, + pTask->info.selfChildId, pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; pData->srcVgId = 0; streamRetrieveReqToData(pReq, pData); - if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; } else { status = TASK_INPUT_STATUS__FAILED; @@ -181,14 +217,14 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock int32_t code = 0; int32_t type = pTask->outputInfo.type; if (type == TASK_OUTPUT__TABLE) { - pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks); + pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks); destroyStreamDataBlock(pBlock); } else if (type == TASK_OUTPUT__SMA) { pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); destroyStreamDataBlock(pBlock); } else { ASSERT(type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH); - code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock); + code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); if (code != 0) { qError("s-task:%s failed to put res into outputQ", pTask->id.idStr); } @@ -200,76 +236,35 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock return 0; } - - -static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { - int8_t status = 0; - - SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); - if (pBlock == NULL) { - streamTaskInputFail(pTask); - status = TASK_INPUT_STATUS__FAILED; - qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, - pTask->id.idStr); - } else { - if (pBlock->type == STREAM_INPUT__TRANS_STATE) { - pTask->status.appendTranstateBlock = true; - } - - int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); - // input queue is full, upstream is blocked now - status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; - } - - return status; -} - -static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { - *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); - if (*pBuf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); - SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); - - pDispatchRsp->inputStatus = status; - pDispatchRsp->streamId = htobe64(pReq->streamId); - pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); - pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); - pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); - pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); - - return TSDB_CODE_SUCCESS; -} - -void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); - if (pInfo != NULL) { - pInfo->dataAllowed = false; - } -} - int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); - int32_t status = 0; SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); - if (!pInfo->dataAllowed) { - qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, pReq->upstreamTaskId); + // upstream task has restarted/leader-follower switch/transferred to other dnodes + if (pReq->stage > pInfo->stage) { + qError("s-task:%s upstream task:0x%x (vgId:%d) has restart/leader-switch/vnode-transfer, prev stage:%" PRId64 + ", current:%" PRId64 " dispatch msg rejected", + pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pInfo->stage, pReq->stage); status = TASK_INPUT_STATUS__BLOCKED; } else { - // Current task has received the checkpoint req from the upstream task, from which the message should all be blocked - if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); - qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); - } + if (!pInfo->dataAllowed) { + qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, + pReq->upstreamTaskId); + status = TASK_INPUT_STATUS__BLOCKED; + } else { + // Current task has received the checkpoint req from the upstream task, from which the message should all be + // blocked + if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); + qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); + } - status = streamTaskAppendInputBlocks(pTask, pReq); + status = streamTaskAppendInputBlocks(pTask, pReq); + } } { @@ -290,30 +285,10 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S return 0; } -//int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { -// qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, -// pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); -// -// // todo add the input queue buffer limitation -// streamTaskEnqueueBlocks(pTask, pReq, pRsp); -// tDeleteStreamDispatchReq(pReq); -// -// if (exec) { -// if (streamTryExec(pTask) < 0) { -// return -1; -// } -// } else { -// streamSchedExec(pTask); -// } -// -// return 0; -//} - int32_t streamProcessRunReq(SStreamTask* pTask) { if (streamTryExec(pTask) < 0) { return -1; } - return 0; } @@ -324,118 +299,36 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, S return 0; } -bool tInputQueueIsFull(const SStreamTask* pTask) { - bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUE_CAPACITY; - double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue); - return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE); -} +void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputInfo.status, TASK_INPUT_STATUS__FAILED); } -int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { - int8_t type = pItem->type; - int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; - double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue); - - if (type == STREAM_INPUT__DATA_SUBMIT) { - SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; - if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) { - qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, - size); - streamDataSubmitDestroy(px); - taosFreeQitem(pItem); - return -1; - } - - int32_t msgLen = px->submit.msgLen; - int64_t ver = px->submit.ver; - - int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem); - if (code != TSDB_CODE_SUCCESS) { - streamDataSubmitDestroy(px); - taosFreeQitem(pItem); - return code; - } - - // use the local variable to avoid the pItem be freed by other threads, since it has been put into queue already. - qDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, - msgLen, ver, total, size + SIZE_IN_MB(msgLen)); - } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || - type == STREAM_INPUT__REF_DATA_BLOCK) { - if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */(tInputQueueIsFull(pTask))) { - qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, - size); - destroyStreamDataBlock((SStreamDataBlock*) pItem); - return -1; - } - - qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); - int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem); - if (code != TSDB_CODE_SUCCESS) { - destroyStreamDataBlock((SStreamDataBlock*) pItem); - return code; - } - } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__TRANS_STATE) { - taosWriteQitem(pTask->inputQueue->queue, pItem); - qDebug("s-task:%s checkpoint/trans-state blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); - } else if (type == STREAM_INPUT__GET_RES) { - // use the default memory limit, refactor later. - taosWriteQitem(pTask->inputQueue->queue, pItem); - qDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); - } else { - ASSERT(0); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + if (num == 0) { + return; } - if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { - atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); - qDebug("s-task:%s new data arrived, active the trigger, trigerStatus:%d", pTask->id.idStr, pTask->triggerStatus); - } - - return 0; -} - -static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; } - -void* streamQueueNextItem(SStreamQueue* pQueue) { - int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING); - - if (flag == STREAM_QUEUE__FAILED) { - ASSERT(pQueue->qItem != NULL); - return streamQueueCurItem(pQueue); - } else { - pQueue->qItem = NULL; - taosGetQitem(pQueue->qall, &pQueue->qItem); - if (pQueue->qItem == NULL) { - taosReadAllQitems(pQueue->queue, pQueue->qall); - taosGetQitem(pQueue->qall, &pQueue->qItem); - } - - return streamQueueCurItem(pQueue); + for (int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + pInfo->dataAllowed = true; } } -void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); } +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + if (pInfo != NULL) { + pInfo->dataAllowed = false; + } +} -SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { - int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); - for(int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); +SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); if (pInfo->taskId == taskId) { return pInfo; } } + qError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId); return NULL; -} - -void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); - if (num == 0) { - return; - } - - for(int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); - pInfo->dataAllowed = true; - } -} +} \ No newline at end of file diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 571aca9935..82fa21ea40 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -20,6 +20,27 @@ #include "tcommon.h" #include "tref.h" +typedef struct { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; +} SBackendManager; + typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; @@ -41,7 +62,8 @@ typedef struct { } RocksdbCfInst; uint32_t nextPow2(uint32_t x); -int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); + +int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); void destroyRocksdbCfInst(RocksdbCfInst* inst); @@ -126,6 +148,218 @@ void destroyFunc(void* arg); int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest); int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest); +SBackendManager* bkdMgtCreate(char* path) { + SBackendManager* p = taosMemoryCalloc(1, sizeof(SBackendManager)); + p->curChkpId = 0; + p->preCkptId = 0; + p->pSST = taosArrayInit(64, sizeof(void*)); + p->path = taosStrdup(path); + p->len = strlen(path) + 128; + p->buf = taosMemoryCalloc(1, p->len); + + p->idx = 0; + p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + + p->pAdd = taosArrayInit(64, sizeof(void*)); + p->pDel = taosArrayInit(64, sizeof(void*)); + p->update = 0; + return p; +} +void bkdMgtDestroy(SBackendManager* bm) { + if (bm == NULL) return; + + taosMemoryFree(bm->buf); + taosMemoryFree(bm->path); + + taosArrayDestroyP(bm->pSST, taosMemoryFree); + taosArrayDestroyP(bm->pAdd, taosMemoryFree); + taosArrayDestroyP(bm->pDel, taosMemoryFree); + + taosHashCleanup(bm->pSstTbl[0]); + taosHashCleanup(bm->pSstTbl[1]); + taosMemoryFree(bm); +} + +int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { + int32_t code = 0; + size_t len = 0; + void* pIter = taosHashIterate(p2, NULL); + while (pIter) { + char* name = taosHashGetKey(pIter, &len); + if (!taosHashGet(p1, name, len)) { + char* p = taosStrdup(name); + taosArrayPush(diff, &p); + } + pIter = taosHashIterate(p2, pIter); + } + return code; +} +int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { + int32_t code = 0; + + code = compareHashTableImpl(p1, p2, add); + code = compareHashTableImpl(p2, p1, del); + + return code; +} +int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + const char* pSST = ".sst"; + int32_t sstLen = strlen(pSST); + + memset(bm->buf, 0, bm->len); + sprintf(bm->buf, "%s%scheckpoint%" PRId64 "", bm->path, TD_DIRSEP, chkpId); + + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + + TdDirPtr pDir = taosOpenDir(bm->buf); + TdDirEntryPtr de = NULL; + int8_t dummy = 0; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { + taosMemoryFreeClear(bm->pCurrent); + bm->pCurrent = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + + if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + taosMemoryFreeClear(bm->pManifest); + bm->pManifest = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { + char* p = taosStrdup(name); + taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + } + if (bm->init == 0) { + bm->preCkptId = -1; + bm->curChkpId = chkpId; + bm->init = 1; + + void* pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], NULL); + while (pIter) { + size_t len; + char* name = taosHashGetKey(pIter, &len); + if (name != NULL && len != 0) { + taosArrayPush(bm->pAdd, &name); + } + pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], pIter); + } + if (taosArrayGetSize(bm->pAdd) > 0) bm->update = 1; + } else { + int32_t code = compareHashTable(bm->pSstTbl[bm->idx], bm->pSstTbl[1 - bm->idx], bm->pAdd, bm->pDel); + if (code != 0) { + // dead code + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + taosHashClear(bm->pSstTbl[1 - bm->idx]); + bm->update = 0; + + return code; + } + + bm->preCkptId = bm->curChkpId; + bm->curChkpId = chkpId; + if (taosArrayGetSize(bm->pAdd) == 0 && taosArrayGetSize(bm->pDel) == 0) { + bm->update = 0; + } + } + taosHashClear(bm->pSstTbl[bm->idx]); + bm->idx = 1 - bm->idx; + + return 0; +} + +int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { + int32_t code = 0; + int32_t len = bm->len + 128; + + char* dstBuf = taosMemoryCalloc(1, len); + char* srcBuf = taosMemoryCalloc(1, len); + + char* srcDir = taosMemoryCalloc(1, len); + char* dstDir = taosMemoryCalloc(1, len); + + sprintf(srcDir, "%s%s%s%" PRId64 "", bm->path, TD_DIRSEP, "checkpoint", bm->curChkpId); + sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); + + if (!taosDirExist(srcDir)) { + return 0; + } + + code = taosMkDir(dstDir); + if (code != 0) { + return code; + } + + // clear current file + memset(dstBuf, 0, len); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); + taosRemoveFile(dstBuf); + + memset(dstBuf, 0, len); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); + taosRemoveFile(dstBuf); + + // add file to $name dir + for (int i = 0; i < taosArrayGetSize(bm->pAdd); i++) { + memset(dstBuf, 0, len); + memset(srcBuf, 0, len); + + char* filename = taosArrayGetP(bm->pAdd, i); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + + taosCopyFile(srcBuf, dstBuf); + } + // del file in $name + for (int i = 0; i < taosArrayGetSize(bm->pDel); i++) { + memset(dstBuf, 0, len); + memset(srcBuf, 0, len); + + char* filename = taosArrayGetP(bm->pDel, i); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + taosRemoveFile(dstBuf); + } + + // copy current file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pCurrent); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); + taosCopyFile(srcBuf, dstBuf); + + // copy manifest file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pManifest); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); + taosCopyFile(srcBuf, dstBuf); + + // clear delta data buf + taosArrayClearP(bm->pAdd, taosMemoryFree); + taosArrayClearP(bm->pDel, taosMemoryFree); + + taosMemoryFree(srcBuf); + taosMemoryFree(dstBuf); + taosMemoryFree(srcDir); + taosMemoryFree(dstDir); + return code; +} + SCfInit ginitDict[] = { {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, destroyFunc, encodeValueFunc, decodeValueFunc}, @@ -143,10 +377,90 @@ SCfInit ginitDict[] = { encodeValueFunc, decodeValueFunc}, }; -void* streamBackendInit(const char* path) { - uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; +bool isValidCheckpoint(const char* dir) { return true; } - qDebug("start to init stream backend at %s", path); +int32_t copyFiles(const char* src, const char* dst) { + int32_t code = 0; + // opt later, just hard link + int32_t sLen = strlen(src); + int32_t dLen = strlen(dst); + char* srcName = taosMemoryCalloc(1, sLen + 64); + char* dstName = taosMemoryCalloc(1, dLen + 64); + + TdDirPtr pDir = taosOpenDir(src); + if (pDir == NULL) return 0; + + TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + + sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); + sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + if (!taosDirEntryIsDir(de)) { + code = taosCopyFile(srcName, dstName); + if (code == -1) { + goto _err; + } + } + + memset(srcName, 0, sLen + 64); + memset(dstName, 0, dLen + 64); + } + +_err: + taosMemoryFreeClear(srcName); + taosMemoryFreeClear(dstName); + taosCloseDir(&pDir); + return code >= 0 ? 0 : -1; +} +int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { + // impl later + int32_t code = 0; + + /*param@1: checkpointId dir + param@2: state + copy pChkpIdDir's file to state dir + opt to set hard link to previous file + */ + char* state = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); + if (chkpId != 0) { + char* chkp = taosMemoryCalloc(1, strlen(path) + 64); + sprintf(chkp, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { + if (taosIsDir(state)) { + // remove dir if exists + // taosRenameFile(const char *oldName, const char *newName) + taosRemoveDir(state); + } + taosMkDir(state); + code = copyFiles(chkp, state); + if (code != 0) { + qError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + qInfo("start to restart stream backend at checkpoint path: %s", chkp); + } + + } else { + qError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, + tstrerror(TAOS_SYSTEM_ERROR(errno)), state); + taosMkDir(state); + } + taosMemoryFree(chkp); + } + *dst = state; + + return 0; +} + +void* streamBackendInit(const char* streamPath, int64_t chkpId) { + char* backendPath = NULL; + int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); + + qDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); + + uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); pHandle->list = tdListNew(sizeof(SCfComparator)); taosThreadMutexInit(&pHandle->mutex, NULL); @@ -168,9 +482,10 @@ void* streamBackendInit(const char* path) { rocksdb_options_set_max_total_wal_size(opts, dbMemLimit); rocksdb_options_set_recycle_log_file_num(opts, 6); rocksdb_options_set_max_write_buffer_number(opts, 3); - rocksdb_options_set_info_log_level(opts, 0); + rocksdb_options_set_info_log_level(opts, 1); rocksdb_options_set_db_write_buffer_size(opts, dbMemLimit); rocksdb_options_set_write_buffer_size(opts, dbMemLimit / 2); + rocksdb_options_set_atomic_flush(opts, 1); pHandle->env = env; pHandle->dbOpt = opts; @@ -182,12 +497,12 @@ void* streamBackendInit(const char* path) { char* err = NULL; size_t nCf = 0; - char** cfs = rocksdb_list_column_families(opts, path, &nCf, &err); + char** cfs = rocksdb_list_column_families(opts, backendPath, &nCf, &err); if (nCf == 0 || nCf == 1 || err != NULL) { taosMemoryFreeClear(err); - pHandle->db = rocksdb_open(opts, path, &err); + pHandle->db = rocksdb_open(opts, backendPath, &err); if (err != NULL) { - qError("failed to open rocksdb, path:%s, reason:%s", path, err); + qError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); taosMemoryFreeClear(err); goto _EXIT; } @@ -195,12 +510,17 @@ void* streamBackendInit(const char* path) { /* list all cf and get prefix */ - streamStateOpenBackendCf(pHandle, (char*)path, cfs, nCf); + code = streamStateOpenBackendCf(pHandle, (char*)backendPath, cfs, nCf); + if (code != 0) { + rocksdb_list_column_families_destroy(cfs, nCf); + goto _EXIT; + } } if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - qDebug("succ to init stream backend at %s, backend:%p", path, pHandle); + qDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); + taosMemoryFreeClear(backendPath); return (void*)pHandle; _EXIT: @@ -210,31 +530,25 @@ _EXIT: taosThreadMutexDestroy(&pHandle->mutex); taosThreadMutexDestroy(&pHandle->cfMutex); taosHashCleanup(pHandle->cfInst); - rocksdb_compactionfilterfactory_destroy(pHandle->filterFactory); tdListFree(pHandle->list); taosMemoryFree(pHandle); - qDebug("failed to init stream backend at %s", path); + qDebug("failed to init stream backend at %s", backendPath); + taosMemoryFree(backendPath); return NULL; } void streamBackendCleanup(void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)arg; - void* pIter = taosHashIterate(pHandle->cfInst, NULL); + + void* pIter = taosHashIterate(pHandle->cfInst, NULL); while (pIter != NULL) { RocksdbCfInst* inst = *(RocksdbCfInst**)pIter; destroyRocksdbCfInst(inst); pIter = taosHashIterate(pHandle->cfInst, pIter); } + taosHashCleanup(pHandle->cfInst); if (pHandle->db) { - char* err = NULL; - rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); - rocksdb_flush(pHandle->db, flushOpt, &err); - if (err != NULL) { - qError("failed to flush db before streamBackend clean up, reason:%s", err); - taosMemoryFree(err); - } - rocksdb_flushoptions_destroy(flushOpt); rocksdb_close(pHandle->db); } rocksdb_options_destroy(pHandle->dbOpt); @@ -252,16 +566,18 @@ void streamBackendCleanup(void* arg) { taosThreadMutexDestroy(&pHandle->mutex); taosThreadMutexDestroy(&pHandle->cfMutex); - - qDebug("destroy stream backend backend:%p", pHandle); + qDebug("destroy stream backend :%p", pHandle); taosMemoryFree(pHandle); return; } void streamBackendHandleCleanup(void* arg) { SBackendCfWrapper* wrapper = arg; bool remove = wrapper->remove; + taosThreadRwlockWrlock(&wrapper->rwLock); + qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); if (wrapper->rocksdb == NULL) { + taosThreadRwlockUnlock(&wrapper->rwLock); return; } @@ -270,19 +586,20 @@ void streamBackendHandleCleanup(void* arg) { char* err = NULL; if (remove) { for (int i = 0; i < cfLen; i++) { - if (wrapper->pHandle[i] != NULL) - rocksdb_drop_column_family(wrapper->rocksdb, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[i], &err); + if (wrapper->pHandle[i] != NULL) rocksdb_drop_column_family(wrapper->rocksdb, wrapper->pHandle[i], &err); if (err != NULL) { - // qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + qError("failed to drop cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } } else { rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + for (int i = 0; i < cfLen; i++) { if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err); if (err != NULL) { - qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + qError("failed to flush cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } @@ -295,6 +612,7 @@ void streamBackendHandleCleanup(void* arg) { } } taosMemoryFreeClear(wrapper->pHandle); + for (int i = 0; i < cfLen; i++) { rocksdb_options_destroy(wrapper->cfOpts[i]); rocksdb_block_based_options_destroy(((RocksdbCfParam*)wrapper->param)[i].tableOpt); @@ -310,6 +628,7 @@ void streamBackendHandleCleanup(void* arg) { wrapper->readOpts = NULL; taosMemoryFreeClear(wrapper->cfOpts); taosMemoryFreeClear(wrapper->param); + taosThreadRwlockUnlock(&wrapper->rwLock); taosThreadRwlockDestroy(&wrapper->rwLock); wrapper->rocksdb = NULL; @@ -319,6 +638,363 @@ void streamBackendHandleCleanup(void* arg) { taosMemoryFree(wrapper); return; } + +int32_t getLatestCheckpoint(void* arg, int64_t* checkpoint) { + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + int64_t tc = 0; + int32_t sz = taosArrayGetSize(pMeta->chkpSaved); + if (sz <= 0) { + taosWUnLockLatch(&pMeta->chkpDirLock); + return -1; + } else { + tc = *(int64_t*)taosArrayGetLast(pMeta->chkpSaved); + } + + taosArrayPush(pMeta->chkpInUse, &tc); + + *checkpoint = tc; + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} +/* + * checkpointSave |--cp1--|--cp2--|--cp3--|--cp4--|--cp5--| + * chkpInUse: |--cp2--|--cp4--| + * chkpInUse is doing translation, cannot del until + * replication is finished + */ +int32_t delObsoleteCheckpoint(void* arg, const char* path) { + SStreamMeta* pMeta = arg; + + taosWLockLatch(&pMeta->chkpDirLock); + + SArray* chkpDel = taosArrayInit(10, sizeof(int64_t)); + SArray* chkpDup = taosArrayInit(10, sizeof(int64_t)); + + int64_t firsId = 0; + if (taosArrayGetSize(pMeta->chkpInUse) >= 1) { + firsId = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + + for (int i = 0; i < taosArrayGetSize(pMeta->chkpSaved); i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + if (id >= firsId) { + taosArrayPush(chkpDup, &id); + } else { + taosArrayPush(chkpDel, &id); + } + } + } else { + int32_t sz = taosArrayGetSize(pMeta->chkpSaved); + int32_t dsz = sz - pMeta->chkpCap; // del size + + for (int i = 0; i < dsz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + taosArrayPush(chkpDel, &id); + } + for (int i = dsz < 0 ? 0 : dsz; i < sz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpSaved, i); + taosArrayPush(chkpDup, &id); + } + } + taosArrayDestroy(pMeta->chkpSaved); + pMeta->chkpSaved = chkpDup; + + taosWUnLockLatch(&pMeta->chkpDirLock); + + for (int i = 0; i < taosArrayGetSize(chkpDel); i++) { + int64_t id = *(int64_t*)taosArrayGet(chkpDel, i); + char tbuf[256] = {0}; + sprintf(tbuf, "%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, id); + if (taosIsDir(tbuf)) { + taosRemoveDir(tbuf); + } + } + taosArrayDestroy(chkpDel); + return 0; +} + +static int32_t compareCheckpoint(const void* a, const void* b) { + int64_t x = *(int64_t*)a; + int64_t y = *(int64_t*)b; + return x < y ? -1 : 1; +} + +int32_t streamBackendLoadCheckpointInfo(void* arg) { + SStreamMeta* pMeta = arg; + int32_t code = 0; + SArray* suffix = NULL; + + int32_t len = strlen(pMeta->path) + 30; + char* chkpPath = taosMemoryCalloc(1, len); + sprintf(chkpPath, "%s%s%s", pMeta->path, TD_DIRSEP, "checkpoints"); + + if (!taosDirExist(chkpPath)) { + // no checkpoint, nothing to load + taosMemoryFree(chkpPath); + return 0; + } + + TdDirPtr pDir = taosOpenDir(chkpPath); + if (pDir == NULL) { + taosMemoryFree(chkpPath); + return 0; + } + + TdDirEntryPtr de = NULL; + suffix = taosArrayInit(4, sizeof(int64_t)); + + while ((de = taosReadDir(pDir)) != NULL) { + if (strcmp(taosGetDirEntryName(de), ".") == 0 || strcmp(taosGetDirEntryName(de), "..") == 0) continue; + + if (taosDirEntryIsDir(de)) { + char checkpointPrefix[32] = {0}; + int64_t checkpointId = 0; + + int ret = sscanf(taosGetDirEntryName(de), "checkpoint%" PRId64 "", &checkpointId); + if (ret == 1) { + taosArrayPush(suffix, &checkpointId); + } + } else { + continue; + } + } + taosArraySort(suffix, compareCheckpoint); + // free previous chkpSaved + taosArrayClear(pMeta->chkpSaved); + for (int i = 0; i < taosArrayGetSize(suffix); i++) { + int64_t id = *(int64_t*)taosArrayGet(suffix, i); + taosArrayPush(pMeta->chkpSaved, &id); + } + + taosArrayDestroy(suffix); + taosCloseDir(&pDir); + taosMemoryFree(chkpPath); + return 0; +} + +int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*** ppHandle, SArray* refs) { + SArray* pHandle = taosArrayInit(16, POINTER_BYTES); + void* pIter = taosHashIterate(pMeta->pTaskBackendUnique, NULL); + while (pIter) { + int64_t id = *(int64_t*)pIter; + + SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); + if (wrapper == NULL) continue; + + taosThreadRwlockRdlock(&wrapper->rwLock); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (wrapper->pHandle[i]) { + rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; + taosArrayPush(pHandle, &p); + } + } + taosThreadRwlockUnlock(&wrapper->rwLock); + + taosArrayPush(refs, &id); + pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); + } + + int32_t nCf = taosArrayGetSize(pHandle); + + rocksdb_column_family_handle_t** ppCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + for (int i = 0; i < nCf; i++) { + ppCf[i] = taosArrayGetP(pHandle, i); + } + taosArrayDestroy(pHandle); + + *ppHandle = ppCf; + return nCf; +} +int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { + int32_t code = -1; + char* err = NULL; + rocksdb_checkpoint_t* cp = rocksdb_checkpoint_object_create(db, &err); + if (cp == NULL || err != NULL) { + qError("failed to do checkpoint at:%s, reason:%s", path, err); + taosMemoryFreeClear(err); + goto _ERROR; + } + + rocksdb_checkpoint_create(cp, path, 64 << 20, &err); + if (err != NULL) { + qError("failed to do checkpoint at:%s, reason:%s", path, err); + taosMemoryFreeClear(err); + } else { + code = 0; + } +_ERROR: + rocksdb_checkpoint_object_destroy(cp); + return code; +} +int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32_t nCf) { + int code = 0; + char* err = NULL; + + rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + + rocksdb_flush_cfs(db, flushOpt, cf, nCf, &err); + if (err != NULL) { + qError("failed to flush db before streamBackend clean up, reason:%s", err); + taosMemoryFree(err); + code = -1; + } + rocksdb_flushoptions_destroy(flushOpt); + return code; +} +int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { + int32_t code = 0; + char* pChkpDir = taosMemoryCalloc(1, 256); + char* pChkpIdDir = taosMemoryCalloc(1, 256); + + sprintf(pChkpDir, "%s%s%s", path, TD_DIRSEP, "checkpoints"); + code = taosMulModeMkDir(pChkpDir, 0755, true); + if (code != 0) { + qError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + code = -1; + return code; + } + + sprintf(pChkpIdDir, "%s%scheckpoint%" PRId64, pChkpDir, TD_DIRSEP, chkpId); + if (taosIsDir(pChkpIdDir)) { + qInfo("stream rm exist checkpoint%s", pChkpIdDir); + taosRemoveFile(pChkpIdDir); + } + *chkpDir = pChkpDir; + *chkpIdDir = pChkpIdDir; + + return 0; +} + +int32_t streamBackendTriggerChkp(void* arg, char* dst) { + SStreamMeta* pMeta = arg; + int64_t backendRid = pMeta->streamBackendRid; + int32_t code = -1; + + SArray* refs = taosArrayInit(16, sizeof(int64_t)); + rocksdb_column_family_handle_t** ppCf = NULL; + + int64_t st = taosGetTimestampMs(); + SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + + if (pHandle == NULL || pHandle->db == NULL) { + goto _ERROR; + } + int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); + + code = chkpPreFlushDb(pHandle->db, ppCf, nCf); + if (code == 0) { + code = chkpDoDbCheckpoint(pHandle->db, dst); + if (code != 0) { + qError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); + } else { + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, + taosGetTimestampMs() - st); + } + } else { + qError("stream backend:%p failed to flush db at:%s", pHandle, dst); + } + + // release all ref to cfWrapper; + for (int i = 0; i < taosArrayGetSize(refs); i++) { + int64_t id = *(int64_t*)taosArrayGet(refs, i); + taosReleaseRef(streamBackendCfWrapperId, id); + } + +_ERROR: + taosReleaseRef(streamBackendId, backendRid); + taosArrayDestroy(refs); + return code; +} +int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId) { + if (arg == NULL) return 0; + + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + taosArrayPush(pMeta->chkpInUse, &chkpId); + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} +int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId) { + if (arg == NULL) return 0; + + SStreamMeta* pMeta = arg; + taosWLockLatch(&pMeta->chkpDirLock); + if (taosArrayGetSize(pMeta->chkpInUse) > 0) { + int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + if (id == chkpId) { + taosArrayPopFrontBatch(pMeta->chkpInUse, 1); + } + } + taosWUnLockLatch(&pMeta->chkpDirLock); + return 0; +} + +int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) { + SStreamMeta* pMeta = arg; + int64_t backendRid = pMeta->streamBackendRid; + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + + SArray* refs = taosArrayInit(16, sizeof(int64_t)); + + rocksdb_column_family_handle_t** ppCf = NULL; + + char* pChkpDir = NULL; + char* pChkpIdDir = NULL; + if (chkpPreCheckDir(pMeta->path, checkpointId, &pChkpDir, &pChkpIdDir) != 0) { + taosArrayDestroy(refs); + return code; + } + + SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + if (pHandle == NULL || pHandle->db == NULL) { + goto _ERROR; + } + + // Get all cf and acquire cfWrappter + int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); + + code = chkpPreFlushDb(pHandle->db, ppCf, nCf); + if (code == 0) { + code = chkpDoDbCheckpoint(pHandle->db, pChkpIdDir); + if (code != 0) { + qError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); + } else { + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, + taosGetTimestampMs() - st); + } + } else { + qError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); + } + // release all ref to cfWrapper; + for (int i = 0; i < taosArrayGetSize(refs); i++) { + int64_t id = *(int64_t*)taosArrayGet(refs, i); + taosReleaseRef(streamBackendCfWrapperId, id); + } + if (code == 0) { + taosWLockLatch(&pMeta->chkpDirLock); + taosArrayPush(pMeta->chkpSaved, &checkpointId); + taosWUnLockLatch(&pMeta->chkpDirLock); + + // delete obsolte checkpoint + delObsoleteCheckpoint(arg, pChkpDir); + pMeta->chkpId = checkpointId; + } + +_ERROR: + taosReleaseRef(streamBackendId, backendRid); + taosArrayDestroy(refs); + taosMemoryFree(ppCf); + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + return code; +} + SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; SListNode* node = NULL; @@ -348,7 +1024,8 @@ static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const cha rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen) { - int ret = memcmp(aBuf, bBuf, aLen); + int len = aLen < bLen ? aLen : bLen; + int ret = memcmp(aBuf, bBuf, len); if (ret == 0) { if (aLen < bLen) return -1; @@ -360,9 +1037,9 @@ int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, return ret; } } -int streamStateValueIsStale(char* vv) { +int streamStateValueIsStale(char* v) { int64_t ts = 0; - taosDecodeFixedI64(vv, &ts); + taosDecodeFixedI64(v, &ts); return (ts != 0 && ts < taosGetTimestampMs()) ? 1 : 0; } int iterValueIsStale(rocksdb_iterator_t* iter) { @@ -432,7 +1109,7 @@ int stateKeyDecode(void* k, char* buf) { int stateKeyToString(void* k, char* buf) { SStateKey* key = k; int n = 0; - n += sprintf(buf + n, "[groupId:%" PRId64 ",", key->key.groupId); + n += sprintf(buf + n, "[groupId:%" PRIu64 ",", key->key.groupId); n += sprintf(buf + n, "ts:%" PRIi64 ",", key->key.ts); n += sprintf(buf + n, "opNum:%" PRIi64 "]", key->opNum); return n; @@ -468,8 +1145,8 @@ int stateSessionKeyDBComp(void* state, const char* aBuf, size_t aLen, const char return stateSessionKeyCmpr(&w1, sizeof(w1), &w2, sizeof(w2)); } -int stateSessionKeyEncode(void* ses, char* buf) { - SStateSessionKey* sess = ses; +int stateSessionKeyEncode(void* k, char* buf) { + SStateSessionKey* sess = k; int len = 0; len += taosEncodeFixedI64((void**)&buf, sess->key.win.skey); len += taosEncodeFixedI64((void**)&buf, sess->key.win.ekey); @@ -477,8 +1154,8 @@ int stateSessionKeyEncode(void* ses, char* buf) { len += taosEncodeFixedI64((void**)&buf, sess->opNum); return len; } -int stateSessionKeyDecode(void* ses, char* buf) { - SStateSessionKey* sess = ses; +int stateSessionKeyDecode(void* k, char* buf) { + SStateSessionKey* sess = k; int len = 0; char* p = buf; @@ -693,33 +1370,23 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { SStreamValue key = {0}; char* p = value; if (streamStateValueIsStale(p)) { - *dest = NULL; - return -1; + goto _EXCEPT; } p = taosDecodeFixedI64(p, &key.unixTimestamp); p = taosDecodeFixedI32(p, &key.len); if (vlen != (sizeof(int64_t) + sizeof(int32_t) + key.len)) { - if (dest != NULL) *dest = NULL; qError("vlen: %d, read len: %d", vlen, key.len); - return -1; + goto _EXCEPT; } + if (key.len != 0 && dest != NULL) p = taosDecodeBinary(p, (void**)dest, key.len); - if (key.len == 0) { - key.data = NULL; - } else { - p = taosDecodeBinary(p, (void**)&(key.data), key.len); - } - - if (ttl != NULL) { - int64_t now = taosGetTimestampMs(); - *ttl = key.unixTimestamp == 0 ? 0 : key.unixTimestamp - now; - } - if (dest != NULL) { - *dest = key.data; - } else { - taosMemoryFree(key.data); - } + if (ttl != NULL) *ttl = key.unixTimestamp == 0 ? 0 : key.unixTimestamp - taosGetTimestampMs(); return key.len; + +_EXCEPT: + if (dest != NULL) *dest = NULL; + if (ttl != NULL) *ttl = 0; + return -1; } const char* compareDefaultName(void* arg) { @@ -808,6 +1475,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t if (3 == sscanf(cf, "0x%" PRIx64 "-%d_%s", &streamId, &taskId, funcname)) { rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -829,6 +1497,12 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t if (err != NULL) { qError("failed to open rocksdb cf, reason:%s", err); taosMemoryFree(err); + taosMemoryFree(cfHandle); + taosMemoryFree(pCompare); + taosMemoryFree(params); + taosMemoryFree(cfOpts); + // fix other leak + return -1; } else { qDebug("succ to open rocksdb cf"); } @@ -838,12 +1512,14 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t cfHandle[0] = NULL; } rocksdb_options_destroy(cfOpts[0]); + handle->db = db; static int32_t cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); for (int i = 0; i < nCf; i++) { char* cf = cfs[i]; - if (i == 0) continue; + if (i == 0) continue; // skip default column family, not set opt + char funcname[64] = {0}; if (3 == sscanf(cf, "0x%" PRIx64 "-%d_%s", &streamId, &taskId, funcname)) { char idstr[128] = {0}; @@ -876,15 +1552,16 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t inst->pHandle[idx] = cfHandle[i]; } } - void** pIter = taosHashIterate(handle->cfInst, NULL); + void* pIter = taosHashIterate(handle->cfInst, NULL); while (pIter) { - RocksdbCfInst* inst = *pIter; + RocksdbCfInst* inst = *(RocksdbCfInst**)pIter; for (int i = 0; i < cfLen; i++) { if (inst->cfOpt[i] == NULL) { rocksdb_options_t* opt = rocksdb_options_create_copy(handle->dbOpt); rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -914,12 +1591,12 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t return 0; } int streamStateOpenBackend(void* backend, SStreamState* pState) { - qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); + // qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); taosAcquireRef(streamBackendId, pState->streamBackendRid); SBackendWrapper* handle = backend; SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper)); - taosThreadMutexLock(&handle->cfMutex); + taosThreadMutexLock(&handle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; @@ -954,6 +1631,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { // refactor later rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(tableOpt, handle->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); rocksdb_block_based_options_set_filter_policy(tableOpt, filter); @@ -997,6 +1675,9 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { void streamStateCloseBackend(SStreamState* pState, bool remove) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SBackendWrapper* pHandle = wrapper->pBackend; + + qInfo("start to close state on backend: %p", pHandle); + taosThreadMutexLock(&pHandle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { @@ -1007,7 +1688,7 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { taosThreadMutexUnlock(&pHandle->cfMutex); char* status[] = {"close", "drop"}; - qInfo("start to close %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, + qInfo("start to %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, wrapper->idstr); wrapper->remove |= remove; // update by other pState taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId); @@ -1065,21 +1746,21 @@ bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len } return true; } -rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, rocksdb_snapshot_t** snapshot, +rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKeyName, rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt) { - int idx = streamStateGetCfIdx(pState, cfName); + int idx = streamStateGetCfIdx(pState, cfKeyName); - rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); - *readOpt = rOpt; + *readOpt = rocksdb_readoptions_create(); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (snapshot != NULL) { *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); - rocksdb_readoptions_set_snapshot(rOpt, *snapshot); - rocksdb_readoptions_set_fill_cache(rOpt, 0); + rocksdb_readoptions_set_snapshot(*readOpt, *snapshot); + rocksdb_readoptions_set_fill_cache(*readOpt, 0); } - return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); + return rocksdb_create_iterator_cf(wrapper->rocksdb, *readOpt, + ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); } #define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ @@ -1154,7 +1835,6 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa taosMemoryFree(val); \ if (vLen != NULL) *vLen = tlen; \ } \ - if (code == 0) qDebug("streamState str: %s succ to read from %s_%s", toString, wrapper->idstr, funcname); \ } while (0); #define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ @@ -1209,10 +1889,11 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { qDebug("streamStateClear_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - char sKeyStr[128] = {0}; - char eKeyStr[128] = {0}; - SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; - SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; + + char sKeyStr[128] = {0}; + char eKeyStr[128] = {0}; + SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; + SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; int sLen = stateKeyEncode(&sKey, sKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr); @@ -1247,6 +1928,7 @@ int32_t streamStateGetFirst_rocksdb(SStreamState* pState, SWinKey* key) { qDebug("streamStateGetFirst_rocksdb"); SWinKey tmp = {.ts = 0, .groupId = 0}; streamStatePut_rocksdb(pState, &tmp, NULL, 0); + SStreamStateCur* pCur = streamStateSeekKeyNext_rocksdb(pState, &tmp); int32_t code = streamStateGetKVByCur_rocksdb(pCur, key, NULL, 0); streamStateFreeCur(pCur); @@ -1301,9 +1983,13 @@ int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, cons if (pKtmp->opNum != pCur->number) { return -1; } - size_t vlen = 0; - if (pVal != NULL) *pVal = (char*)rocksdb_iter_value(pCur->iter, &vlen); - if (pVLen != NULL) *pVLen = vlen; + + if (pVLen != NULL) { + size_t vlen = 0; + const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); + *pVLen = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + } + *pKey = pKtmp->key; return 0; } @@ -1361,20 +2047,32 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); - int32_t code = 0; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + int32_t code = 0; const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX}; STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0); - char buf[128] = {0}; - int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); + if (code != 0) { + return NULL; + } + + char buf[128] = {0}; + int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); + + { + char tbuf[256] = {0}; + stateKeyToString((void*)&maxStateKey, tbuf); + qDebug("seek to last:%s", tbuf); + } + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + + pCur->number = pState->number; + pCur->db = ((SBackendCfWrapper*)pState->pTdbState->pBackendCfWrapper)->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); - rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); + rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); rocksdb_iter_prev(pCur->iter); while (rocksdb_iter_valid(pCur->iter) && iterValueIsStale(pCur->iter)) { rocksdb_iter_prev(pCur->iter); @@ -1384,6 +2082,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK streamStateFreeCur(pCur); pCur = NULL; } + STREAM_STATE_DEL_ROCKSDB(pState, "state", &maxStateKey); return pCur; } @@ -1391,12 +2090,14 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; SStateKey sKey = {.key = *key, .opNum = pState->number}; char buf[128] = {0}; @@ -1405,18 +2106,14 @@ SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* rocksdb_iter_seek(pCur->iter, buf, len); if (rocksdb_iter_valid(pCur->iter) && !iterValueIsStale(pCur->iter)) { - size_t vlen; - char* val = (char*)rocksdb_iter_value(pCur->iter, &vlen); - if (!streamStateValueIsStale(val)) { - SStateKey curKey; - size_t kLen = 0; - char* keyStr = (char*)rocksdb_iter_key(pCur->iter, &kLen); - stateKeyDecode((void*)&curKey, keyStr); + SStateKey curKey; + size_t kLen = 0; + char* keyStr = (char*)rocksdb_iter_key(pCur->iter, &kLen); + stateKeyDecode((void*)&curKey, keyStr); - if (stateKeyCmpr(&sKey, sizeof(sKey), &curKey, sizeof(curKey)) == 0) { - pCur->number = pState->number; - return pCur; - } + if (stateKeyCmpr(&sKey, sizeof(sKey), &curKey, sizeof(curKey)) == 0) { + pCur->number = pState->number; + return pCur; } } streamStateFreeCur(pCur); @@ -1454,23 +2151,22 @@ int32_t streamStateSessionGet_rocksdb(SStreamState* pState, SSessionKey* key, vo SSessionKey resKey = *key; void* tmp = NULL; int32_t vLen = 0; - code = streamStateSessionGetKVByCur_rocksdb(pCur, &resKey, &tmp, &vLen); - if (code == 0) { - if (pVLen != NULL) *pVLen = vLen; - if (key->win.skey != resKey.win.skey) { - code = -1; - } else { - *key = resKey; - if (pVal != NULL && pVLen != NULL) { - *pVal = taosMemoryCalloc(1, *pVLen); - memcpy(*pVal, tmp, *pVLen); - } - } + code = streamStateSessionGetKVByCur_rocksdb(pCur, &resKey, &tmp, &vLen); + if (code == 0 && key->win.skey == resKey.win.skey) { + *key = resKey; + + if (pVal) { + *pVal = tmp; + tmp = NULL; + }; + if (pVLen) *pVLen = vLen; + } else { + code = -1; } + taosMemoryFree(tmp); streamStateFreeCur(pCur); - // impl later return code; } @@ -1516,8 +2212,6 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta rocksdb_iter_prev(pCur->iter); if (!rocksdb_iter_valid(pCur->iter)) { - // qWarn("streamState failed to seek key prev - // %s", toString); streamStateFreeCur(pCur); return NULL; } @@ -1535,10 +2229,10 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; - char buf[128] = {0}; - + char buf[128] = {0}; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int len = stateSessionKeyEncode(&sKey, buf); + if (!streamStateIterSeekAndValid(pCur->iter, buf, len)) { streamStateFreeCur(pCur); return NULL; @@ -1586,6 +2280,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, con streamStateFreeCur(pCur); return NULL; } + size_t klen; const char* iKey = rocksdb_iter_key(pCur->iter, &klen); SStateSessionKey curKey = {0}; @@ -1672,6 +2367,7 @@ SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinK pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1711,8 +2407,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, winKeyDecode(&winKey, keyStr); const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - // char* dst = NULL; - int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); if (len < 0) { return -1; } @@ -1733,6 +2428,7 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1770,6 +2466,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; char buf[128] = {0}; int len = winKeyEncode((void*)key, buf); @@ -1803,10 +2500,10 @@ int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSes if (pCur == NULL) { return -1; } - pCur->number = pState->number; pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; int32_t c = 0; @@ -2075,13 +2772,12 @@ void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); + pCur->number = pState->number; return pCur; } int32_t streamDefaultIterValid_rocksdb(void* iter) { SStreamStateCur* pCur = iter; - bool val = rocksdb_iter_valid(pCur->iter); - - return val ? 1 : 0; + return rocksdb_iter_valid(pCur->iter) ? 1 : 0; } void streamDefaultIterSeek_rocksdb(void* iter, const char* key) { SStreamStateCur* pCur = iter; @@ -2097,13 +2793,16 @@ char* streamDefaultIterKey_rocksdb(void* iter, int32_t* len) { } char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { SStreamStateCur* pCur = iter; - int32_t vlen = 0; - char* dst = NULL; - const char* vval = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); - if (decodeValueFunc((void*)vval, vlen, NULL, &dst) < 0) { + char* ret = NULL; + + int32_t vlen = 0; + const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); + *len = decodeValueFunc((void*)val, vlen, NULL, &ret); + if (*len < 0) { return NULL; } - return dst; + + return ret; } // batch func void* streamStateCreateBatch() { @@ -2117,25 +2816,34 @@ int32_t streamStateGetBatchSize(void* pBatch) { void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_writebatch_t*)pBatch); } void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } -int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key, +int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - int i = streamStateGetCfIdx(pState, cfName); + int i = streamStateGetCfIdx(pState, cfKeyName); if (i < 0) { - qError("streamState failed to put to cf name:%s", cfName); + qError("streamState failed to put to cf name:%s", cfKeyName); return -1; } + char buf[128] = {0}; int32_t klen = ginitDict[i].enFunc((void*)key, buf); - char* ttlV = NULL; - int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); + char* ttlV = NULL; + int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); + rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); taosMemoryFree(ttlV); + + { + char tbuf[256] = {0}; + ginitDict[i].toStrFunc((void*)key, tbuf); + qDebug("streamState str: %s succ to write to %s_%s, len: %d", tbuf, wrapper->idstr, ginitDict[i].key, vlen); + } return 0; } + int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl, void* tmpBuf) { char buf[128] = {0}; @@ -2143,14 +2851,19 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb char* ttlV = tmpBuf; int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); if (tmpBuf == NULL) { taosMemoryFree(ttlV); } + + { + char tbuf[256] = {0}; + ginitDict[cfIdx].toStrFunc((void*)key, tbuf); + qDebug("streamState str: %s succ to write to %s_%s", tbuf, wrapper->idstr, ginitDict[cfIdx].key); + } return 0; } int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { @@ -2161,11 +2874,13 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { qError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); return -1; + } else { + qDebug("write batch to backend:%p", wrapper->pBackend); } return 0; } - uint32_t nextPow2(uint32_t x) { + if (x <= 1) return 2; x = x - 1; x = x | (x >> 1); x = x | (x >> 2); @@ -2173,4 +2888,4 @@ uint32_t nextPow2(uint32_t x) { x = x | (x >> 8); x = x | (x >> 16); return x + 1; -} \ No newline at end of file +} diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 722c557b8f..baf319d014 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -13,99 +13,72 @@ * along with this program. If not, see . */ -#if 0 -#include "streamInc.h" +#include "streamInt.h" -int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { +int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pReq->mgmtEps) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->mnodeId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { +int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pReq->mgmtEps) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->mnodeId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; tEndDecode(pDecoder); return 0; } -int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { +int32_t tEncodeStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->nodeId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pRsp->success) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { +int32_t tDecodeStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->nodeId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pRsp->success) < 0) return -1; tEndDecode(pDecoder); return 0; } -int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq) { +int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpointReadyMsg* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->upstreamTaskId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; - if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; - if (tEncodeI8(pEncoder, pReq->taskLevel) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq) { - if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; - if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; - if (tDecodeI8(pDecoder, &pReq->taskLevel) < 0) return -1; - tEndDecode(pDecoder); - return 0; -} - -int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) { - if (tStartEncode(pEncoder) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->upstreamTaskId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->upstreamNodeId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; - if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; - if (tEncodeI8(pEncoder, pRsp->taskLevel) < 0) return -1; - tEndEncode(pEncoder); - return pEncoder->pos; -} - -int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) { +int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; @@ -114,83 +87,256 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; - if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->taskLevel) < 0) return -1; tEndDecode(pDecoder); return 0; } -static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { - if (pTask->checkpointingId == 0) { - pTask->checkpointingId = checkpointId; - pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList); +static int32_t streamAlignCheckpoint(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num); + if (old == 0) { + qDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); } - ASSERT(pTask->checkpointingId == checkpointId); - return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); } -static int32_t streamDoCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { - // commit tdb state - streamStateCommit(pTask->pState); - // commit non-tdb state - // copy and save new state - // report to mnode - // send checkpoint req to downstream - return 0; -} - -static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { - // ref wal - // set status checkpointing - // do checkpoint - return 0; -} -int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { - int32_t code; - int64_t checkpointId = pReq->checkpointId; - - code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId); - if (code < 0) { - // rsp error - return -1; +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { + SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); + if (pChkpoint == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - return 0; + pChkpoint->type = checkpointType; + + SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + if (pBlock == NULL) { + taosFreeQitem(pChkpoint); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pBlock->info.type = STREAM_CHECKPOINT; + pBlock->info.version = pTask->checkpointingId; + pBlock->info.rows = 1; + pBlock->info.childId = pTask->info.selfChildId; + + pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock));//pBlock; + taosArrayPush(pChkpoint->blocks, pBlock); + + taosMemoryFree(pBlock); + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pChkpoint) < 0) { + taosFreeQitem(pChkpoint); + return TSDB_CODE_OUT_OF_MEMORY; + } + + streamSchedExec(pTask); + return TSDB_CODE_SUCCESS; } -int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq) { - int32_t code; - int64_t checkpointId = pReq->checkpointId; - int32_t childId = pReq->childId; +int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) { - code = streamAlignCheckpoint(pTask, checkpointId, childId); - if (code > 0) { - return 0; + // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. + taosThreadMutexLock(&pTask->lock); + + pTask->status.taskStatus = TASK_STATUS__CK; + pTask->checkpointingId = pReq->checkpointId; + pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + + // 2. let's dispatch checkpoint msg to downstream task directly and do nothing else. put the checkpoint block into + // inputQ, to make sure all blocks with less version have been handled by this task already. + int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + taosThreadMutexUnlock(&pTask->lock); + + return code; +} + +static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { + pBlock->srcTaskId = pTask->id.taskId; + pBlock->srcVgId = pTask->pMeta->vgId; + + int32_t code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); + if (code == 0) { + streamDispatchStreamBlock(pTask); + } else { + streamFreeQitem((SStreamQueueItem*)pBlock); + } + + return code; +} + +int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { + SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); + int64_t checkpointId = pDataBlock->info.version; + + const char* id = pTask->id.idStr; + int32_t code = TSDB_CODE_SUCCESS; + + // set the task status + pTask->checkpointingId = checkpointId; + + // set task status + pTask->status.taskStatus = TASK_STATUS__CK; + + { // todo: remove this when the pipeline checkpoint generating is used. + SStreamMeta* pMeta = pTask->pMeta; + taosWLockLatch(&pMeta->lock); + + if (pMeta->chkptNotReadyTasks == 0) { + pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); + pMeta->totalTasks = pMeta->chkptNotReadyTasks; } - if (code < 0) { - ASSERT(0); - return -1; + + taosWUnLockLatch(&pMeta->lock); + } + + //todo fix race condition: set the status and append checkpoint block + int32_t taskLevel = pTask->info.taskLevel; + if (taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + qDebug("s-task:%s set childIdx:%d, and add checkpoint block into outputQ", id, pTask->info.selfChildId); + continueDispatchCheckpointBlock(pBlock, pTask); + } else { // only one task exists, no need to dispatch downstream info + streamProcessCheckpointReadyMsg(pTask); + streamFreeQitem((SStreamQueueItem*)pBlock); + } + } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { + ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) > 0); + + // update the child Id for downstream tasks + streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); + + // there are still some upstream tasks not send checkpoint request, do nothing and wait for then + int32_t notReady = streamAlignCheckpoint(pTask); + int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + if (notReady > 0) { + qDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", + id, pTask->info.selfChildId, notReady, num); + streamFreeQitem((SStreamQueueItem*)pBlock); + return code; + } + + if (taskLevel == TASK_LEVEL__SINK) { + qDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", + id, num); + streamFreeQitem((SStreamQueueItem*)pBlock); + streamTaskBuildCheckpoint(pTask); + } else { + qDebug( + "s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg " + "downstream", id, num); + + // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task + // can start local checkpoint procedure + pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + + // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task + // already. And then, dispatch check point msg to all downstream tasks + code = continueDispatchCheckpointBlock(pBlock, pTask); } } - code = streamDoCheckpoint(pMeta, pTask, checkpointId); - if (code < 0) { - // rsp error - return -1; + return code; +} + +/** + * All down stream tasks have successfully completed the check point task. + * Current stream task is allowed to start to do checkpoint things in ASYNC model. + */ +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); + + // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task + int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1); + ASSERT(notReady >= 0); + + if (notReady == 0) { + qDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", + pTask->id.idStr); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); + } else { + int32_t total = streamTaskGetNumOfDownstream(pTask); + qDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); } - // send rsp to all children - return 0; } -int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) { - // recover step2, scan from wal - // unref wal - // set status normal - return 0; +int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { + taosWLockLatch(&pMeta->lock); + + int64_t keys[2]; + for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + keys[0] = pId->streamId; + keys[1] = pId->taskId; + + SStreamTask* p = *(SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + if (p->info.fillHistory == 1) { + continue; + } + + int8_t prev = p->status.taskStatus; + ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); + + p->chkInfo.checkpointId = p->checkpointingId; + streamSetStatusNormal(p); + + // save the task + streamMetaSaveTask(pMeta, p); + streamTaskOpenAllUpstreamInput(p); // open inputQ for all upstream tasks + qDebug("vgId:%d s-task:%s level:%d commit task status after checkpoint completed, checkpointId:%" PRId64 + ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", + pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.currentVer, + streamGetTaskStatusStr(prev)); + } + + if (streamMetaCommit(pMeta) < 0) { + taosWUnLockLatch(&pMeta->lock); + qError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, + checkpointId, terrstr()); + return -1; + } else { + taosWUnLockLatch(&pMeta->lock); + qInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { + int32_t code = 0; + + // check for all tasks, and do generate the vnode-wide checkpoint data. + SStreamMeta* pMeta = pTask->pMeta; + int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1); + ASSERT(remain >= 0); + + if (remain == 0) { // all tasks are ready + qDebug("s-task:%s is ready for checkpoint", pTask->id.idStr); + pMeta->totalTasks = 0; + + streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); + streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); + qDebug("vgId:%d vnode wide checkpoint completed, save all tasks status, checkpointId:%" PRId64, pMeta->vgId, + pTask->checkpointingId); + } else { + qDebug("vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, not ready:%d/%d", pMeta->vgId, + pTask->id.idStr, remain, pMeta->totalTasks); + } + + // send check point response to upstream task + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + code = streamTaskSendCheckpointSourceRsp(pTask); + } else { + code = streamTaskSendCheckpointReadyMsg(pTask); + } + + if (code != TSDB_CODE_SUCCESS) { + // todo: let's retry send rsp to upstream/mnode + qError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, + pTask->checkpointingId, tstrerror(code)); + } + + return code; } -#endif diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index fc1b788b77..ea3e4f5985 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -15,7 +15,7 @@ #include "streamInt.h" -SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) { +SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, pReq->totalLen); if (pData == NULL) { return NULL; @@ -23,6 +23,7 @@ SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq pData->type = blockType; pData->srcVgId = srcVg; + pData->srcTaskId = pReq->upstreamTaskId; int32_t blockNum = pReq->blockNum; SArray* pArray = taosArrayInit_s(sizeof(SSDataBlock), blockNum); @@ -60,16 +61,15 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT return NULL; } + pStreamBlocks->srcTaskId = pTask->id.taskId; pStreamBlocks->type = STREAM_INPUT__DATA_BLOCK; pStreamBlocks->blocks = pRes; if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem; - pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pSubmit->ver; } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem; - pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pMerged->ver; } @@ -121,6 +121,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) { return NULL; } + pDataSubmit->ver = pData->ver; pDataSubmit->submit = *pData; *pDataSubmit->dataRef = 1; // initialize the reference count to be 1 pDataSubmit->type = type; @@ -165,6 +166,7 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm return 0; } +// todo handle memory error SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { terrno = 0; @@ -194,11 +196,16 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* taosFreeQitem(pElem); return (SStreamQueueItem*)pMerged; } else { - qDebug("block type:%d not merged with existed blocks list, type:%d", pElem->type, dst->type); + qDebug("block type:%s not merged with existed blocks list, type:%d", streamGetBlockTypeStr(pElem->type), dst->type); return NULL; } } +static void freeItems(void* param) { + SSDataBlock* pBlock = param; + taosArrayDestroy(pBlock->pDataBlock); +} + void streamFreeQitem(SStreamQueueItem* data) { int8_t type = data->type; if (type == STREAM_INPUT__GET_RES) { @@ -232,5 +239,22 @@ void streamFreeQitem(SStreamQueueItem* data) { SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; blockDataDestroy(pRefBlock->pBlock); taosFreeQitem(pRefBlock); + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + SStreamDataBlock* pBlock = (SStreamDataBlock*) data; + taosArrayDestroyEx(pBlock->blocks, freeItems); + taosFreeQitem(pBlock); } } + +const char* streamGetBlockTypeStr(int32_t type) { + switch (type) { + case STREAM_INPUT__CHECKPOINT: + return "checkpoint"; + case STREAM_INPUT__CHECKPOINT_TRIGGER: + return "checkpoint-trigger"; + case STREAM_INPUT__TRANS_STATE: + return "trans-state"; + default: + return ""; + } +} \ No newline at end of file diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 0864eb3c28..cf04bcc1b8 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -14,7 +14,9 @@ */ #include "streamInt.h" +#include "trpc.h" #include "ttimer.h" +#include "tmisce.h" #define MAX_BLOCK_NAME_NUM 1024 #define DISPATCH_RETRY_INTERVAL_MS 300 @@ -25,22 +27,38 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; +typedef struct { + int32_t upStreamTaskId; + SEpSet upstreamNodeEpset; + SRpcMsg msg; +} SStreamChkptReadyInfo; + +static void doRetryDispatchData(void* param, void* tmrId); +static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet); +static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq); +static int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, + int32_t vgSz, int64_t groupId); +static int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet); + static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, int64_t dstTaskId, int32_t type); -static void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { +void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { pMsg->msgType = msgType; pMsg->pCont = pCont; pMsg->contLen = contLen; } -static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { +int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; - if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamChildId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->blockNum) < 0) return -1; @@ -57,44 +75,15 @@ static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatc return pEncoder->pos; } -static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { - int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); - void* buf = taosMemoryCalloc(1, dataStrLen); - if (buf == NULL) return -1; - - SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; - pRetrieve->useconds = 0; - pRetrieve->precision = TSDB_DEFAULT_PRECISION; - pRetrieve->compressed = 0; - pRetrieve->completed = 1; - pRetrieve->streamBlockType = pBlock->info.type; - pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); - pRetrieve->skey = htobe64(pBlock->info.window.skey); - pRetrieve->ekey = htobe64(pBlock->info.window.ekey); - pRetrieve->version = htobe64(pBlock->info.version); - pRetrieve->watermark = htobe64(pBlock->info.watermark); - memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); - - int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); - pRetrieve->numOfCols = htonl(numOfCols); - - int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); - actualLen += sizeof(SRetrieveTableRsp); - ASSERT(actualLen <= dataStrLen); - taosArrayPush(pReq->dataLen, &actualLen); - taosArrayPush(pReq->data, &buf); - - pReq->totalLen += dataStrLen; - return 0; -} - int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->blockNum) < 0) return -1; @@ -113,14 +102,16 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { taosArrayPush(pReq->dataLen, &len1); taosArrayPush(pReq->data, &data); } + tEndDecode(pDecoder); return 0; } -int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, - int64_t dstTaskId, int32_t type) { +static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, + int32_t numOfBlocks, int64_t dstTaskId, int32_t type) { pReq->streamId = pTask->id.streamId; pReq->srcVgId = vgId; + pReq->stage = pTask->pMeta->stage; pReq->upstreamTaskId = pTask->id.taskId; pReq->upstreamChildId = pTask->info.selfChildId; pReq->upstreamNodeId = pTask->info.nodeId; @@ -205,11 +196,11 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) .retrieveLen = dataStrLen, }; - int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t sz = taosArrayGetSize(pTask->pUpstreamInfoList); ASSERT(sz > 0); for (int32_t i = 0; i < sz; i++) { req.reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); req.dstNodeId = pEpInfo->nodeId; req.dstTaskId = pEpInfo->taskId; int32_t len; @@ -231,7 +222,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) tEncodeStreamRetrieveReq(&encoder, &req); tEncoderClear(&encoder); - SRpcMsg rpcMsg = {.code = 0, .msgType = TDMT_STREAM_RETRIEVE, .pCont = buf, .contLen = sizeof(SMsgHead) + len}; + SRpcMsg rpcMsg = {0}; + initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE, buf, len + sizeof(SMsgHead)); + if (tmsgSendReq(&pEpInfo->epSet, &rpcMsg) < 0) { ASSERT(0); goto CLEAR; @@ -274,175 +267,16 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR rpcFreeCont(buf); return code; } - tEncoderClear(&encoder); - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = TDMT_STREAM_TASK_CHECK; - - qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, + initRpcMsg(&msg, TDMT_VND_STREAM_TASK_CHECK, buf, tlen + sizeof(SMsgHead)); + qDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); return 0; } -int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet) { - void* buf = NULL; - int32_t code = -1; - SRpcMsg msg = {0}; - - int32_t tlen; - tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code); - if (code < 0) { - return -1; - } - - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - ((SMsgHead*)buf)->vgId = htonl(vgId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) { - if (buf) { - rpcFreeCont(buf); - } - return code; - } - - tEncoderClear(&encoder); - - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH; - - tmsgSendReq(pEpSet, &msg); - - const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, - pReq->downstreamTaskId, vgId); - return 0; -} - -static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { - void* buf = NULL; - int32_t code = -1; - SRpcMsg msg = {0}; - - // serialize - int32_t tlen; - tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); - if (code < 0) { - goto FAIL; - } - - code = -1; - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - goto FAIL; - } - - ((SMsgHead*)buf)->vgId = htonl(vgId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) { - goto FAIL; - } - tEncoderClear(&encoder); - - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = pTask->msgInfo.msgType; - - qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg, len:%d", pTask->id.idStr, pReq->taskId, vgId, - msg.contLen); - return tmsgSendReq(pEpSet, &msg); - -FAIL: - if (buf) { - rpcFreeCont(buf); - } - - return code; -} - -int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz, - int64_t groupId) { - uint32_t hashValue = 0; - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - if (pTask->pNameMap == NULL) { - pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - } - - void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t)); - if (pVal) { - SBlockName* pBln = (SBlockName*)pVal; - hashValue = pBln->hashValue; - if (!pDataBlock->info.parTbName[0]) { - memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN); - memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName)); - } - } else { - char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); - if (ctbName == NULL) { - return -1; - } - - if (pDataBlock->info.parTbName[0]) { - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); - } else { - buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); - } - - /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ - SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; - hashValue = - taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix); - taosMemoryFree(ctbName); - SBlockName bln = {0}; - bln.hashValue = hashValue; - memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName)); - if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) { - tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); - } - } - - bool found = false; - // TODO: optimize search - int32_t j; - for (j = 0; j < vgSz; j++) { - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); - ASSERT(pVgInfo->vgId > 0); - - if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { - if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { - return -1; - } - - if (pReqs[j].blockNum == 0) { - atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - } - - pReqs[j].blockNum++; - found = true; - break; - } - } - ASSERT(found); - return 0; -} - static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; int32_t numOfBlocks = taosArrayGetSize(pData->blocks); @@ -569,7 +403,11 @@ static void doRetryDispatchData(void* param, void* tmrId) { if (!streamTaskShouldStop(&pTask->status)) { qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + if (streamTaskShouldPause(&pTask->status)) { + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS * 10); + } else { + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + } } else { atomic_sub_fetch_8(&pTask->status.timerActive, 1); qDebug("s-task:%s should stop, abort from timer", pTask->id.idStr); @@ -580,34 +418,101 @@ static void doRetryDispatchData(void* param, void* tmrId) { } void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) { - qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration); + qError("s-task:%s dispatch data in %" PRId64 "ms", pTask->id.idStr, waitDuration); taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer); } -int32_t streamDispatchStreamBlock(SStreamTask* pTask) { - STaskOutputInfo* pInfo = &pTask->outputInfo; - ASSERT((pInfo->type == TASK_OUTPUT__FIXED_DISPATCH || pInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH)); +int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz, + int64_t groupId) { + uint32_t hashValue = 0; + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + if (pTask->pNameMap == NULL) { + pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + } - int32_t numOfElems = taosQueueItemSize(pInfo->queue->queue); + void* pVal = tSimpleHashGet(pTask->pNameMap, &groupId, sizeof(int64_t)); + if (pVal) { + SBlockName* pBln = (SBlockName*)pVal; + hashValue = pBln->hashValue; + if (!pDataBlock->info.parTbName[0]) { + memset(pDataBlock->info.parTbName, 0, TSDB_TABLE_NAME_LEN); + memcpy(pDataBlock->info.parTbName, pBln->parTbName, strlen(pBln->parTbName)); + } + } else { + char* ctbName = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); + if (ctbName == NULL) { + return -1; + } + + if (pDataBlock->info.parTbName[0]) { + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + } else { + buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + } + + /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ + SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; + hashValue = + taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix); + taosMemoryFree(ctbName); + SBlockName bln = {0}; + bln.hashValue = hashValue; + memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName)); + if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) { + tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); + } + } + + bool found = false; + // TODO: optimize search + int32_t j; + for (j = 0; j < vgSz; j++) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); + ASSERT(pVgInfo->vgId > 0); + + if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { + if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { + return -1; + } + + if (pReqs[j].blockNum == 0) { + atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); + } + + pReqs[j].blockNum++; + found = true; + break; + } + } + ASSERT(found); + return 0; +} + +int32_t streamDispatchStreamBlock(SStreamTask* pTask) { + ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); + + const char* id = pTask->id.idStr; + int32_t numOfElems = taosQueueItemSize(pTask->outputInfo.queue->pQueue); if (numOfElems > 0) { - qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr, - numOfElems); + qDebug("s-task:%s try to dispatch intermediate block to downstream, elem in outputQ:%d", id, numOfElems); } // to make sure only one dispatch is running - int8_t old = atomic_val_compare_exchange_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); + int8_t old = + atomic_val_compare_exchange_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); if (old != TASK_OUTPUT_STATUS__NORMAL) { - qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); + qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", id, old); return 0; } ASSERT(pTask->msgInfo.pData == NULL); - qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pInfo->status); + qDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputInfo.status); - SStreamDataBlock* pBlock = streamQueueNextItem(pInfo->queue); + SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputInfo.queue); if (pBlock == NULL) { - atomic_store_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pInfo->status); + atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); + qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputInfo.status); return 0; } @@ -623,8 +528,8 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { break; } - qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr, - tstrerror(terrno), pInfo->status, retryCount); + qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", id, + tstrerror(terrno), pTask->outputInfo.status, retryCount); // todo deal with only partially success dispatch case atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); @@ -646,6 +551,294 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } +int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { + SStreamScanHistoryFinishReq req = { + .streamId = pTask->id.streamId, + .childId = pTask->info.selfChildId, + .upstreamTaskId = pTask->id.taskId, + .upstreamNodeId = pTask->pMeta->vgId, + }; + + // serialize + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; + pTask->notReadyTasks = 1; + doDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgs = taosArrayGetSize(vgInfo); + pTask->notReadyTasks = numOfVgs; + + qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, + numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); + for (int32_t i = 0; i < numOfVgs; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); + req.downstreamTaskId = pVgInfo->taskId; + doDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + } + } else { + qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); + streamProcessScanHistoryFinishRsp(pTask); + } + + return 0; +} + +// this function is usually invoked by sink/agg task +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pReadyMsgList); + ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) == num); + + for (int32_t i = 0; i < num; ++i) { + SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); + + qDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, + pInfo->upStreamTaskId); + } + + taosArrayClear(pTask->pReadyMsgList); + qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); + + return TSDB_CODE_SUCCESS; +} + +// this function is only invoked by source task, and send rsp to mnode +int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosArrayGetSize(pTask->pReadyMsgList) == 1); + SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, 0); + + tmsgSendRsp(&pInfo->msg); + + taosArrayClear(pTask->pReadyMsgList); + qDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); + + return TSDB_CODE_SUCCESS; +} + +int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { + int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); + void* buf = taosMemoryCalloc(1, dataStrLen); + if (buf == NULL) return -1; + + SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; + pRetrieve->useconds = 0; + pRetrieve->precision = TSDB_DEFAULT_PRECISION; + pRetrieve->compressed = 0; + pRetrieve->completed = 1; + pRetrieve->streamBlockType = pBlock->info.type; + pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); + pRetrieve->skey = htobe64(pBlock->info.window.skey); + pRetrieve->ekey = htobe64(pBlock->info.window.ekey); + pRetrieve->version = htobe64(pBlock->info.version); + pRetrieve->watermark = htobe64(pBlock->info.watermark); + memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); + + int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); + pRetrieve->numOfCols = htonl(numOfCols); + + int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); + actualLen += sizeof(SRetrieveTableRsp); + ASSERT(actualLen <= dataStrLen); + taosArrayPush(pReq->dataLen, &actualLen); + taosArrayPush(pReq->data, &buf); + + pReq->totalLen += dataStrLen; + return 0; +} + +int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet) { + void* buf = NULL; + int32_t code = -1; + SRpcMsg msg = {0}; + + int32_t tlen; + tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) { + if (buf) { + rpcFreeCont(buf); + } + return code; + } + + tEncoderClear(&encoder); + + initRpcMsg(&msg, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, buf, tlen + sizeof(SMsgHead)); + + tmsgSendReq(pEpSet, &msg); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, + pReq->downstreamTaskId, vgId); + return 0; +} + +int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { + void* buf = NULL; + int32_t code = -1; + SRpcMsg msg = {0}; + + // serialize + int32_t tlen; + tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); + if (code < 0) { + goto FAIL; + } + + code = -1; + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + goto FAIL; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamDispatchReq(&encoder, pReq)) < 0) { + goto FAIL; + } + tEncoderClear(&encoder); + + initRpcMsg(&msg, pTask->msgInfo.msgType, buf, tlen + sizeof(SMsgHead)); + qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); + + return tmsgSendReq(pEpSet, &msg); + +FAIL: + if (buf) { + rpcFreeCont(buf); + } + + return code; +} + +int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, + int8_t isSucceed) { + int32_t len = 0; + int32_t code = 0; + SEncoder encoder; + + SStreamCheckpointSourceRsp rsp = { + .checkpointId = pReq->checkpointId, + .taskId = pReq->taskId, + .nodeId = pReq->nodeId, + .streamId = pReq->streamId, + .expireTime = pReq->expireTime, + .mnodeId = pReq->mnodeId, + .success = isSucceed, + }; + + tEncodeSize(tEncodeStreamCheckpointSourceRsp, &rsp, len, code); + if (code < 0) { + return code; + } + + void* pBuf = rpcMallocCont(sizeof(SMsgHead) + len); + if (pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)pBuf)->vgId = htonl(pReq->mnodeId); + + void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); + + tEncoderInit(&encoder, (uint8_t*)abuf, len); + tEncodeStreamCheckpointSourceRsp(&encoder, &rsp); + tEncoderClear(&encoder); + + initRpcMsg(pMsg, 0, pBuf, sizeof(SMsgHead) + len); + pMsg->info = *pRpcInfo; + return 0; +} + +int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, + SStreamTask* pTask, int8_t isSucceed) { + SStreamChkptReadyInfo info = {0}; + buildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, isSucceed); + + if (pTask->pReadyMsgList == NULL) { + pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + } + + taosArrayPush(pTask->pReadyMsgList, &info); + qDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); + return TSDB_CODE_SUCCESS; +} + +int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { + int32_t code = 0; + int32_t tlen = 0; + void* buf = NULL; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + return TSDB_CODE_SUCCESS; + } + + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + + SStreamCheckpointReadyMsg req = {0}; + req.downstreamNodeId = pTask->pMeta->vgId; + req.downstreamTaskId = pTask->id.taskId; + req.streamId = pTask->id.streamId; + req.checkpointId = checkpointId; + req.childId = pInfo->childId; + req.upstreamNodeId = pInfo->nodeId; + req.upstreamTaskId = pInfo->taskId; + + tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { + rpcFreeCont(buf); + return code; + } + tEncoderClear(&encoder); + + ASSERT(req.upstreamTaskId != 0); + + SStreamChkptReadyInfo info = {.upStreamTaskId = pInfo->taskId, .upstreamNodeEpset = pInfo->epSet}; + initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + info.msg.info.noResp = 1; // refactor later. + + qDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", + pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.downstreamNodeId, index); + + if (pTask->pReadyMsgList == NULL) { + pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + } + + taosArrayPush(pTask->pReadyMsgList, &info); + return 0; +} + int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -668,7 +861,7 @@ int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistory return 0; } -int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) { +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen) { int32_t len = 0; int32_t code = 0; SEncoder encoder; @@ -699,6 +892,16 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, tEncodeCompleteHistoryDataMsg(&encoder, &msg); tEncoderClear(&encoder); + *pBuffer = pBuf; + *pLen = len; + return 0; +} + +int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) { + void* pBuf = NULL; + int32_t len = 0; + + streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len); SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); SStreamContinueExecInfo info = {.taskId = pReq->upstreamTaskId, .epset = pInfo->epSet}; @@ -726,7 +929,7 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { SStreamContinueExecInfo* pInfo = taosArrayGet(pTask->pRspMsgList, i); tmsgSendRsp(&pInfo->msg); - qDebug("s-task:%s level:%d notify upstream:0x%x to continue process data from WAL", pTask->id.idStr, pTask->info.taskLevel, + qDebug("s-task:%s level:%d notify upstream:0x%x to continue process data in WAL", pTask->id.idStr, pTask->info.taskLevel, pInfo->taskId); } @@ -792,7 +995,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // the input queue of the (down stream) task that receive the output data is full, // so the TASK_INPUT_STATUS_BLOCKED is rsp if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { - pTask->inputStatus = TASK_INPUT_STATUS__BLOCKED; // block the input of current task, to push pressure to upstream + pTask->inputInfo.status = TASK_INPUT_STATUS__BLOCKED; // block the input of current task, to push pressure to upstream pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 " wait for %dms and retry dispatch data", id, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, DISPATCH_RETRY_INTERVAL_MS); @@ -809,7 +1012,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i pTask->msgInfo.blockingTs = 0; // put data into inputQ of current task is also allowed - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; + pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; } // now ready for next data output @@ -821,3 +1024,41 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i return 0; } + +int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1; + + int32_t size = taosArrayGetSize(pMsg->pNodeList); + if (tEncodeI32(pEncoder, size) < 0) return -1; + + for (int32_t i = 0; i < size; ++i) { + SNodeUpdateInfo* pInfo = taosArrayGet(pMsg->pNodeList, i); + if (tEncodeI32(pEncoder, pInfo->nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pInfo->prevEp) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pInfo->newEp) < 0) return -1; + } + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1; + + int32_t size = 0; + if (tDecodeI32(pDecoder, &size) < 0) return -1; + pMsg->pNodeList = taosArrayInit(size, sizeof(SNodeUpdateInfo)); + for (int32_t i = 0; i < size; ++i) { + SNodeUpdateInfo info = {0}; + if (tDecodeI32(pDecoder, &info.nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &info.prevEp) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &info.newEp) < 0) return -1; + taosArrayPush(pMsg->pNodeList, &info); + } + + tEndDecode(pDecoder); + return 0; +} diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 5e5c165c9c..ff667fa778 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -16,11 +16,10 @@ #include "streamInt.h" // maximum allowed processed block batches. One block may include several submit blocks -#define MAX_STREAM_EXEC_BATCH_NUM 32 -#define MIN_STREAM_EXEC_BATCH_NUM 4 -#define STREAM_RESULT_DUMP_THRESHOLD 100 +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define MIN_STREAM_EXEC_BATCH_NUM 4 +#define STREAM_RESULT_DUMP_THRESHOLD 100 -static int32_t updateCheckPointInfo(SStreamTask* pTask); static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); bool streamTaskShouldStop(const SStreamStatus* pStatus) { @@ -30,17 +29,11 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus) { bool streamTaskShouldPause(const SStreamStatus* pStatus) { int32_t status = atomic_load_8((int8_t*)&pStatus->taskStatus); - return (status == TASK_STATUS__PAUSE || status == TASK_STATUS__HALT); + return (status == TASK_STATUS__PAUSE); } static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize, int32_t* totalBlocks) { - int32_t code = updateCheckPointInfo(pTask); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return code; - } - int32_t numOfBlocks = taosArrayGetSize(pRes); if (numOfBlocks > 0) { SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes); @@ -53,7 +46,7 @@ static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, SIZE_IN_MB(size)); - code = streamTaskOutputResultBlock(pTask, pStreamBlocks); + int32_t code = streamTaskOutputResultBlock(pTask, pStreamBlocks); if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { // back pressure and record position destroyStreamDataBlock(pStreamBlocks); return -1; @@ -90,7 +83,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return 0; } - if (pTask->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { qWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", pTask->id.idStr); taosMsleep(1000); continue; @@ -103,7 +96,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i resetTaskInfo(pExecutor); } - qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr()); + qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, tstrerror(code)); continue; } @@ -119,8 +112,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i taosArrayPush(pRes, &block); numOfBlocks += 1; - qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId, - pRetrieveBlock->reqId); + qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr, + pTask->info.selfChildId, pRetrieveBlock->reqId); } break; @@ -131,6 +124,8 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i // TODO } continue; + } else if (output->info.type == STREAM_CHECKPOINT) { + continue; // checkpoint block not dispatch to downstream tasks } SSDataBlock block = {0}; @@ -197,7 +192,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { return 0; } - if (pTask->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { qDebug("s-task:%s inputQ is blocked, wait for 10sec and retry", pTask->id.idStr); taosMsleep(10000); continue; @@ -253,49 +248,27 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { return 0; } -int32_t updateCheckPointInfo(SStreamTask* pTask) { - int64_t ckId = 0; - int64_t dataVer = 0; - qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); +int32_t streamTaskGetInputQItems(const SStreamTask* pTask) { + int32_t numOfItems1 = taosQueueItemSize(pTask->inputInfo.queue->pQueue); + int32_t numOfItems2 = taosQallItemSize(pTask->inputInfo.queue->qall); - SCheckpointInfo* pCkInfo = &pTask->chkInfo; - if (ckId > pCkInfo->id) { // save it since the checkpoint is updated - qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64 - ", checkPoint id:%" PRId64 " -> %" PRId64, - pTask->id.idStr, pCkInfo->version, dataVer, pCkInfo->id, ckId); - - pTask->chkInfo = (SCheckpointInfo){.version = dataVer, .id = ckId, .currentVer = pCkInfo->currentVer}; - - taosWLockLatch(&pTask->pMeta->lock); - - streamMetaSaveTask(pTask->pMeta, pTask); - if (streamMetaCommit(pTask->pMeta) < 0) { - taosWUnLockLatch(&pTask->pMeta->lock); - qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr()); - return -1; - } else { - taosWUnLockLatch(&pTask->pMeta->lock); - qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr); - } - } - - return TSDB_CODE_SUCCESS; + return numOfItems1 + numOfItems2; } +// wait for the stream task to be idle static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { - // wait for the stream task to be idle - int64_t st = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + int64_t st = taosGetTimestampMs(); while (!streamTaskIsIdle(pStreamTask)) { - qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr, - pTask->info.taskLevel, pStreamTask->id.idStr); + qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel, + pStreamTask->id.idStr); taosMsleep(100); } double el = (taosGetTimestampMs() - st) / 1000.0; if (el > 0) { - qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", pTask->id.idStr, - pStreamTask->id.idStr, el); + qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", id, pStreamTask->id.idStr, el); } } @@ -335,7 +308,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { ASSERT(status == TASK_STATUS__HALT || status == TASK_STATUS__DROPPING); } else { - ASSERT(status == TASK_STATUS__SCAN_HISTORY); + ASSERT(status == TASK_STATUS__NORMAL); pStreamTask->status.taskStatus = TASK_STATUS__HALT; qDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } @@ -387,14 +360,15 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // 7. pause allowed. streamTaskEnablePause(pStreamTask); - if (taosQueueEmpty(pStreamTask->inputQueue->queue)) { - SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);; + if (taosQueueEmpty(pStreamTask->inputInfo.queue->pQueue)) { + SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); pDelBlock->info.rows = 0; pDelBlock->info.version = 0; pItem->type = STREAM_INPUT__REF_DATA_BLOCK; pItem->pBlock = pDelBlock; - int32_t code = tAppendDataToInputQueue(pStreamTask, (SStreamQueueItem*)pItem); + int32_t code = streamTaskPutDataIntoInputQ(pStreamTask, (SStreamQueueItem*)pItem); qDebug("s-task:%s append dummy delete block,res:%d", pStreamTask->id.idStr, code); } @@ -419,89 +393,53 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { return code; } -static int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { - int32_t retryTimes = 0; - int32_t MAX_RETRY_TIMES = 5; - const char* id = pTask->id.idStr; +// set input +static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_t* pVer, const char* id) { + void* pExecutor = pTask->exec.pExecutor; - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one - while (1) { - if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); - return TSDB_CODE_SUCCESS; - } + const SStreamQueueItem* pItem = pInput; + if (pItem->type == STREAM_INPUT__GET_RES) { + const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; + qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); - if (qItem == NULL) { - qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); + const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; + qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, + pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); + ASSERT((*pVer) <= pSubmit->submit.ver); + (*pVer) = pSubmit->submit.ver; - qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); + } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { + const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput; - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } - } + SArray* pBlockList = pBlock->blocks; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); - // non sink task - while (1) { - if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { + const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput; - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); - if (qItem == NULL) { - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { - taosMsleep(10); - qDebug("try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); - continue; - } + SArray* pBlockList = pMerged->submits; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks, + pMerged->ver); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); + ASSERT((*pVer) <= pMerged->ver); + (*pVer) = pMerged->ver; - qDebug("break batchSize:%d, %s", *numOfBlocks, id); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { + const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; + qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - // do not merge blocks for sink node and check point data block - if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || - qItem->type == STREAM_INPUT__TRANS_STATE) { - if (*pInput == NULL) { - qDebug("s-task:%s checkpoint/transtate msg extracted, start to process immediately", id); - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } else { - // previous existed blocks needs to be handle, before handle the checkpoint msg block - qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id, *numOfBlocks); - streamQueueProcessFail(pTask->inputQueue); - return TSDB_CODE_SUCCESS; - } - } else { - if (*pInput == NULL) { - ASSERT((*numOfBlocks) == 0); - *pInput = qItem; - } else { - // todo we need to sort the data block, instead of just appending into the array list. - void* newRet = streamMergeQueueItem(*pInput, qItem); - if (newRet == NULL) { - qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); - streamQueueProcessFail(pTask->inputQueue); - return TSDB_CODE_SUCCESS; - } + } else if (pItem->type == STREAM_INPUT__CHECKPOINT || pItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + const SStreamDataBlock* pCheckpoint = (const SStreamDataBlock*)pInput; + qSetMultiStreamInput(pExecutor, pCheckpoint->blocks, 1, pItem->type); - *pInput = newRet; - } - - *numOfBlocks += 1; - streamQueueProcessSuccess(pTask->inputQueue); - - if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { - qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); - return TSDB_CODE_SUCCESS; - } - } + } else { + ASSERT(0); } } @@ -534,7 +472,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock // agg task should dispatch trans-state msg to sink task, to flush all data to sink task. if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { pBlock->srcVgId = pTask->pMeta->vgId; - code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock); + code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); if (code == 0) { streamDispatchStreamBlock(pTask); } else { @@ -543,7 +481,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock } else { // level == TASK_LEVEL__SINK streamFreeQitem((SStreamQueueItem*)pBlock); } - } else { // non-dispatch task, do task state transfer directly + } else { // non-dispatch task, do task state transfer directly streamFreeQitem((SStreamQueueItem*)pBlock); if (level != TASK_LEVEL__SINK) { qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); @@ -569,20 +507,28 @@ int32_t streamExecForAll(SStreamTask* pTask) { const char* id = pTask->id.idStr; while (1) { - int32_t batchSize = 0; + int32_t numOfBlocks = 0; SStreamQueueItem* pInput = NULL; if (streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s stream task stopped, abort", id); + qDebug("s-task:%s stream task is stopped", id); break; } // merge multiple input data if possible in the input queue. qDebug("s-task:%s start to extract data block from inputQ", id); - /*int32_t code = */extractBlocksFromInputQ(pTask, &pInput, &batchSize); + /*int32_t code = */ streamTaskGetDataFromInputQ(pTask, &pInput, &numOfBlocks); if (pInput == NULL) { - ASSERT(batchSize == 0); - break; + ASSERT(numOfBlocks == 0); + return 0; + } + + int32_t type = pInput->type; + + // dispatch checkpoint msg to all downstream tasks + if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput); + continue; } if (pInput->type == STREAM_INPUT__TRANS_STATE) { @@ -591,60 +537,51 @@ int32_t streamExecForAll(SStreamTask* pTask) { } if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); - qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); - streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); - continue; + ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT); + + if (type == STREAM_INPUT__DATA_BLOCK) { + qDebug("s-task:%s sink task start to sink %d blocks", id, numOfBlocks); + streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); + continue; + } } int64_t st = taosGetTimestampMs(); - qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize); - { - // set input - void* pExecutor = pTask->exec.pExecutor; + const SStreamQueueItem* pItem = pInput; + qDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); - const SStreamQueueItem* pItem = pInput; - if (pItem->type == STREAM_INPUT__GET_RES) { - const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; - qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; - qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); - qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, - pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); - } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { - const SStreamDataBlock* pBlock = (const SStreamDataBlock*)pInput; - - SArray* pBlockList = pBlock->blocks; - int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); - } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { - const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput; - - SArray* pBlockList = pMerged->submits; - int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d", id, pTask, numOfBlocks); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); - } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { - const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; - qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); - } else { - ASSERT(0); - } - } + int64_t ver = pTask->chkInfo.checkpointVer; + doSetStreamInputBlock(pTask, pInput, &ver, id); int64_t resSize = 0; int32_t totalBlocks = 0; streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); - double el = (taosGetTimestampMs() - st) / 1000.0; - qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", - id, el, SIZE_IN_MB(resSize), totalBlocks); + double el = (taosGetTimestampMs() - st) / 1000.0; + qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, + SIZE_IN_MB(resSize), totalBlocks); + + // update the currentVer if processing the submit blocks. + ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.currentVer && ver >= pTask->chkInfo.checkpointVer); + + if (ver != pTask->chkInfo.checkpointVer) { + qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64, pTask->id.idStr, + pTask->chkInfo.checkpointVer, ver); + pTask->chkInfo.checkpointVer = ver; + } streamFreeQitem(pInput); + + // todo other thread may change the status + // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed. + if (type == STREAM_INPUT__CHECKPOINT) { +// ASSERT(pTask->status.taskStatus == TASK_STATUS__CK); + qDebug("s-task:%s checkpoint block received, set the status:%s", pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus)); + streamTaskBuildCheckpoint(pTask); + return 0; + } } return 0; @@ -671,12 +608,13 @@ int32_t streamTryExec(SStreamTask* pTask) { return -1; } - // todo the task should be commit here +// streamTaskBuildCheckpoint(pTask); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); - if (!(taosQueueEmpty(pTask->inputQueue->queue) || streamTaskShouldStop(&pTask->status) || + if (!(taosQueueEmpty(pTask->inputInfo.queue->pQueue) || streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status))) { streamSchedExec(pTask); } @@ -711,7 +649,7 @@ int32_t streamTaskReloadState(SStreamTask* pTask) { } int32_t streamAlignTransferState(SStreamTask* pTask) { - int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); int32_t old = atomic_val_compare_exchange_32(&pTask->transferStateAlignCnt, 0, numOfUpstream); if (old == 0) { qDebug("s-task:%s set the transfer state aligncnt %d", pTask->id.idStr, numOfUpstream); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 45878bb865..ff5e9adaee 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -16,49 +16,113 @@ #include "executor.h" #include "streamBackendRocksdb.h" #include "streamInt.h" +#include "tmisce.h" #include "tref.h" +#include "tstream.h" #include "ttimer.h" +#define META_HB_CHECK_INTERVAL 200 +#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec +#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) + static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; -int32_t streamBackendId = 0; -int32_t streamBackendCfWrapperId = 0; + +int32_t streamBackendId = 0; +int32_t streamBackendCfWrapperId = 0; +int32_t streamMetaId = 0; + +static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta); +static void metaHbToMnode(void* param, void* tmrId); +static void streamMetaClear(SStreamMeta* pMeta); +static int32_t streamMetaBegin(SStreamMeta* pMeta); +static void streamMetaCloseImpl(void* arg); +static void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask); + +typedef struct { + TdThreadMutex mutex; + SHashObj* pTable; +} SMetaRefMgt; + +SMetaRefMgt gMetaRefMgt; + +void metaRefMgtInit(); +void metaRefMgtCleanup(); +int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid); static void streamMetaEnvInit() { streamBackendId = taosOpenRef(64, streamBackendCleanup); streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup); + + streamMetaId = taosOpenRef(64, streamMetaCloseImpl); + + metaRefMgtInit(); } void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } void streamMetaCleanup() { taosCloseRef(streamBackendId); taosCloseRef(streamBackendCfWrapperId); + taosCloseRef(streamMetaId); + + metaRefMgtCleanup(); } -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) { +void metaRefMgtInit() { + taosThreadMutexInit(&(gMetaRefMgt.mutex), NULL); + gMetaRefMgt.pTable = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); +} + +void metaRefMgtCleanup() { + void* pIter = taosHashIterate(gMetaRefMgt.pTable, NULL); + while (pIter) { + SArray* list = *(SArray**)pIter; + for (int i = 0; i < taosArrayGetSize(list); i++) { + void* rid = taosArrayGetP(list, i); + taosMemoryFree(rid); + } + taosArrayDestroy(list); + pIter = taosHashIterate(gMetaRefMgt.pTable, pIter); + } + taosHashCleanup(gMetaRefMgt.pTable); + + taosThreadMutexDestroy(&gMetaRefMgt.mutex); +} + +int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { + taosThreadMutexLock(&gMetaRefMgt.mutex); + void* p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId)); + if (p == NULL) { + SArray* list = taosArrayInit(8, sizeof(void*)); + taosArrayPush(list, &rid); + taosHashPut(gMetaRefMgt.pTable, &vgId, sizeof(vgId), &list, sizeof(void*)); + } else { + SArray* list = *(SArray**)p; + taosArrayPush(list, &rid); + } + taosThreadMutexUnlock(&gMetaRefMgt.mutex); + return 0; +} + +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) { int32_t code = -1; SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); if (pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + qError("vgId:%d failed to prepare stream meta, alloc size:%" PRIzu ", out of memory", vgId, sizeof(SStreamMeta)); return NULL; } - int32_t len = strlen(path) + 20; - char* streamPath = taosMemoryCalloc(1, len); - sprintf(streamPath, "%s/%s", path, "stream"); - pMeta->path = taosStrdup(streamPath); + int32_t len = strlen(path) + 64; + char* tpath = taosMemoryCalloc(1, len); + + sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream"); + pMeta->path = tpath; + if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { goto _err; } - memset(streamPath, 0, len); - sprintf(streamPath, "%s/%s", pMeta->path, "checkpoints"); - code = taosMulModeMkDir(streamPath, 0755, false); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(code); - goto _err; - } - - if (tdbTbOpen("task.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { + if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { goto _err; } @@ -66,6 +130,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } + if (streamMetaBegin(pMeta) < 0) { + goto _err; + } + _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); pMeta->pTasks = taosHashInit(64, fp, true, HASH_NO_LOCK); if (pMeta->pTasks == NULL) { @@ -73,80 +141,187 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF } // task list - pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamId)); + pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamTaskId)); if (pMeta->pTaskList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - if (streamMetaBegin(pMeta) < 0) { - goto _err; - } - pMeta->walScanCounter = 0; pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; + pMeta->stage = stage; - memset(streamPath, 0, len); - sprintf(streamPath, "%s/%s", pMeta->path, "state"); - code = taosMulModeMkDir(streamPath, 0755, false); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(code); - goto _err; - } + // send heartbeat every 5sec. + pMeta->rid = taosAddRef(streamMetaId, pMeta); + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + *pRid = pMeta->rid; + + metaRefMgtAdd(pMeta->vgId, pRid); + + pMeta->hbInfo.hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->hbInfo.tickCounter = 0; + pMeta->hbInfo.stopFlag = 0; - pMeta->streamBackend = streamBackendInit(streamPath); - if (pMeta->streamBackend == NULL) { - goto _err; - } - pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->pTaskBackendUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); + pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); + pMeta->chkpCap = 8; + taosInitRWLatch(&pMeta->chkpDirLock); - taosMemoryFree(streamPath); + pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + while (pMeta->streamBackend == NULL) { + taosMsleep(2 * 1000); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + if (pMeta->streamBackend == NULL) { + qError("vgId:%d failed to init stream backend", pMeta->vgId); + qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + } + } + pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + + code = streamBackendLoadCheckpointInfo(pMeta); taosInitRWLatch(&pMeta->lock); taosThreadMutexInit(&pMeta->backendMutex, NULL); + pMeta->pauseTaskNum = 0; + + qInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, + stage); return pMeta; _err: - taosMemoryFree(streamPath); taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); - // if (pMeta->streamBackend) streamBackendCleanup(pMeta->streamBackend); + + // taosThreadMutexDestroy(&pMeta->backendMutex); + // taosThreadRwlockDestroy(&pMeta->lock); + taosMemoryFree(pMeta); + qError("failed to open stream meta"); return NULL; } +int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { + streamMetaClear(pMeta); + + pMeta->streamBackendRid = -1; + pMeta->streamBackend = NULL; + + char* defaultPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128); + sprintf(defaultPath, "%s%s%s", pMeta->path, TD_DIRSEP, "state"); + taosRemoveDir(defaultPath); + + char* newPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128); + sprintf(newPath, "%s%s%s", pMeta->path, TD_DIRSEP, "received"); + + int32_t code = taosStatFile(newPath, NULL, NULL, NULL); + if (code == 0) { + // directory exists + code = taosRenameFile(newPath, defaultPath); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + qError("vgId:%d failed to rename file, from %s to %s, code:%s", pMeta->vgId, newPath, defaultPath, + tstrerror(terrno)); + + taosMemoryFree(defaultPath); + taosMemoryFree(newPath); + return -1; + } + } + + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + while (pMeta->streamBackend == NULL) { + taosMsleep(2 * 1000); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + if (pMeta->streamBackend == NULL) { + qError("vgId:%d failed to init stream backend", pMeta->vgId); + qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + // return -1; + } + } + pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + streamBackendLoadCheckpointInfo(pMeta); + + return 0; +} + +void streamMetaClear(SStreamMeta* pMeta) { + void* pIter = NULL; + while ((pIter = taosHashIterate(pMeta->pTasks, pIter)) != NULL) { + SStreamTask* p = *(SStreamTask**)pIter; + + // release the ref by timer + if (p->info.triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer + qDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); + taosTmrStop(p->schedInfo.pTimer); + p->info.triggerParam = 0; + streamMetaReleaseTask(pMeta, p); + } + + streamMetaReleaseTask(pMeta, p); + } + + taosRemoveRef(streamBackendId, pMeta->streamBackendRid); + + taosHashClear(pMeta->pTasks); + taosHashClear(pMeta->pTaskBackendUnique); + + taosArrayClear(pMeta->pTaskList); + taosArrayClear(pMeta->chkpSaved); + taosArrayClear(pMeta->chkpInUse); +} + void streamMetaClose(SStreamMeta* pMeta) { + qDebug("start to close stream meta"); + if (pMeta == NULL) { + return; + } + + // int64_t rid = *(int64_t*)pMeta->pRid; + // if (taosTmrStop(pMeta->hbInfo.hbTmr)) { + // taosMemoryFree(pMeta->pRid); + // } else { + // // do nothing, stop by timer thread + // } + taosRemoveRef(streamMetaId, pMeta->rid); +} + +void streamMetaCloseImpl(void* arg) { + SStreamMeta* pMeta = arg; + qDebug("start to do-close stream meta"); + if (pMeta == NULL) { + return; + } + + streamMetaClear(pMeta); + tdbAbort(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); - void* pIter = NULL; - while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - tFreeStreamTask(*(SStreamTask**)pIter); - } + taosArrayDestroy(pMeta->pTaskList); + taosArrayDestroy(pMeta->chkpSaved); + taosArrayDestroy(pMeta->chkpInUse); taosHashCleanup(pMeta->pTasks); - taosRemoveRef(streamBackendId, pMeta->streamBackendRid); - pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList); + taosHashCleanup(pMeta->pTaskBackendUnique); + taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); - taosHashCleanup(pMeta->pTaskBackendUnique); + taosMemoryFree(pMeta); + qDebug("end to close stream meta"); } int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { @@ -168,7 +343,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { + int64_t key[2] = {0}; + extractStreamTaskKey(key, pTask); + + if (tdbTbUpsert(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); return -1; } @@ -177,12 +355,18 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { - int32_t code = tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(taskId), pMeta->txn); +void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask) { + pKey[0] = pTask->id.streamId; + pKey[1] = pTask->id.taskId; +} + +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { + int32_t code = tdbTbDelete(pMeta->pTaskDb, pKey, STREAM_TASK_KEY_LEN, pMeta->txn); if (code != 0) { - qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, taskId, tstrerror(terrno)); + qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pKey[1], + tstrerror(terrno)); } else { - qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, taskId); + qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pKey[1]); } return code; @@ -193,7 +377,7 @@ int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTa *pAdded = false; int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (p == NULL) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { tFreeStreamTask(pTask); @@ -226,10 +410,26 @@ int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { return (int32_t)size; } +int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { + int32_t num = 0; + size_t size = taosArrayGetSize(pMeta->pTaskList); + for (int32_t i = 0; i < size; ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + int64_t keys[2] = {pId->streamId, pId->taskId}; + + SStreamTask** p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + if ((*p)->info.fillHistory == 0) { + num += 1; + } + } + + return num; +} + SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { taosRLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; + int64_t keys[2] = {streamId, taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (ppTask != NULL) { if (!streamTaskShouldStop(&(*ppTask)->status)) { @@ -244,7 +444,7 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t return NULL; } -void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { +void streamMetaReleaseTask(SStreamMeta* UNUSED_PARAM(pMeta), SStreamTask* pTask) { int32_t ref = atomic_sub_fetch_32(&pTask->refCnt, 1); if (ref > 0) { qTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref); @@ -257,9 +457,9 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } -static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamId* id) { +static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamTaskId* id) { for (int32_t i = 0; i < num; ++i) { - SStreamId* pTaskId = taosArrayGet(pMeta->pTaskList, i); + SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); if (pTaskId->streamId == id->streamId && pTaskId->taskId == id->taskId) { taosArrayRemove(pMeta->pTaskList, i); break; @@ -273,10 +473,14 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // pre-delete operation taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; + int64_t keys[2] = {streamId, taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (ppTask) { pTask = *ppTask; + if (streamTaskShouldPause(&pTask->status)) { + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + } atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); } else { qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); @@ -317,7 +521,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t ASSERT(pTask->status.timerActive == 0); doRemoveIdFromList(pMeta, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id); - streamMetaRemoveTask(pMeta, taskId); + streamMetaRemoveTask(pMeta, keys); streamMetaReleaseTask(pMeta, pTask); } else { qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); @@ -338,38 +542,73 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) { // todo add error log int32_t streamMetaCommit(SStreamMeta* pMeta) { if (tdbCommit(pMeta->db, pMeta->txn) < 0) { - qError("failed to commit stream meta"); + qError("vgId:%d failed to commit stream meta", pMeta->vgId); return -1; } if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { - qError("failed to commit stream meta"); + qError("vgId:%d failed to do post-commit stream meta", pMeta->vgId); return -1; } if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + qError("vgId:%d failed to begin trans", pMeta->vgId); return -1; } return 0; } -int32_t streamMetaAbort(SStreamMeta* pMeta) { - if (tdbAbort(pMeta->db, pMeta->txn) < 0) { - return -1; +int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { + int64_t chkpId = 0; + + TBC* pCur = NULL; + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + return chkpId; } - if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, - TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - return -1; + void* pKey = NULL; + int32_t kLen = 0; + void* pVal = NULL; + int32_t vLen = 0; + SDecoder decoder; + + tdbTbcMoveToFirst(pCur); + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SCheckpointInfo info; + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); + if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { + continue; + } + tDecoderClear(&decoder); + + chkpId = TMAX(chkpId, info.checkpointId); } - return 0; + + qDebug("get max chkp id: %" PRId64 "", chkpId); + + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + + return chkpId; } -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { +static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + taosArrayDestroy(pRecycleList); +} + +int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { TBC* pCur = NULL; + qInfo("vgId:%d load stream tasks from meta files", pMeta->vgId); if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno)); return -1; @@ -380,26 +619,20 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; - SArray* pRecycleList = taosArrayInit(4, sizeof(int32_t)); + SArray* pRecycleList = taosArrayInit(4, STREAM_TASK_KEY_LEN); tdbTbcMoveToFirst(pCur); - while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); - taosArrayDestroy(pRecycleList); + doClear(pKey, pVal, pCur, pRecycleList); return -1; } + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); if (tDecodeStreamTask(&decoder, pTask) < 0) { tDecoderClear(&decoder); - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); - taosArrayDestroy(pRecycleList); + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); qError( "stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream " @@ -413,8 +646,10 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); - taosArrayPush(pRecycleList, &taskId); + int64_t key[2] = {0}; + extractStreamTaskKey(key, pTask); + taosArrayPush(pRecycleList, key); int32_t total = taosArrayGetSize(pRecycleList); qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); continue; @@ -422,14 +657,11 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { // do duplicate task check. int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (p == NULL) { - if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.version) < 0) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); + if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer) < 0) { + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); - taosArrayDestroy(pRecycleList); return -1; } @@ -437,22 +669,24 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { } else { tdbFree(pKey); tdbFree(pVal); - tdbTbcClose(pCur); taosMemoryFree(pTask); continue; } + streamTaskResetUpstreamStageInfo(pTask); if (taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, sizeof(void*)) < 0) { - tdbFree(pKey); - tdbFree(pVal); - tdbTbcClose(pCur); + doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); - taosArrayDestroy(pRecycleList); return -1; } + if (streamTaskShouldPause(&pTask->status)) { + atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + } + ASSERT(pTask->status.downstreamReady == 0); } + qInfo("vgId:%d pause task num:%d", pMeta->vgId, pMeta->pauseTaskNum); tdbFree(pKey); tdbFree(pVal); @@ -463,12 +697,212 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { - int32_t taskId = *(int32_t*)taosArrayGet(pRecycleList, i); - streamMetaRemoveTask(pMeta, taskId); + int64_t* pId = taosArrayGet(pRecycleList, i); + streamMetaRemoveTask(pMeta, pId); } } - qDebug("vgId:%d load %d task from disk", pMeta->vgId, (int32_t)taosArrayGetSize(pMeta->pTaskList)); + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + qDebug("vgId:%d load %d tasks into meta from disk completed", pMeta->vgId, numOfTasks); taosArrayDestroy(pRecycleList); return 0; } + +int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->vgId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->numOfTasks) < 0) return -1; + + for (int32_t i = 0; i < pReq->numOfTasks; ++i) { + STaskStatusEntry* ps = taosArrayGet(pReq->pTaskStatus, i); + if (tEncodeI64(pEncoder, ps->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->status) < 0) return -1; + } + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->vgId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->numOfTasks) < 0) return -1; + + pReq->pTaskStatus = taosArrayInit(pReq->numOfTasks, sizeof(STaskStatusEntry)); + for (int32_t i = 0; i < pReq->numOfTasks; ++i) { + STaskStatusEntry hb = {0}; + if (tDecodeI64(pDecoder, &hb.streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &hb.taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &hb.status) < 0) return -1; + + taosArrayPush(pReq->pTaskStatus, &hb); + } + + tEndDecode(pDecoder); + return 0; +} + +static bool readyToSendHb(SMetaHbInfo* pInfo) { + if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { + // reset the counter + pInfo->tickCounter = 0; + return true; + } + return false; +} + +void metaHbToMnode(void* param, void* tmrId) { + int64_t rid = *(int64_t*)param; + + SStreamHbMsg hbMsg = {0}; + SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); + if (pMeta == NULL) { + // taosMemoryFree(param); + return; + } + + // need to stop, stop now + if (pMeta->hbInfo.stopFlag == STREAM_META_WILL_STOP) { + pMeta->hbInfo.stopFlag = STREAM_META_OK_TO_STOP; + qDebug("vgId:%d jump out of meta timer", pMeta->vgId); + taosReleaseRef(streamMetaId, rid); + return; + } + + if (!readyToSendHb(&pMeta->hbInfo)) { + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosReleaseRef(streamMetaId, rid); + return; + } + + taosRLockLatch(&pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + + SEpSet epset = {0}; + + hbMsg.vgId = pMeta->vgId; + hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry)); + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); + int64_t keys[2] = {pId->streamId, pId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + + if ((*pTask)->info.fillHistory == 1) { + continue; + } + + STaskStatusEntry entry = {.streamId = pId->streamId, .taskId = pId->taskId, .status = (*pTask)->status.taskStatus}; + taosArrayPush(hbMsg.pTaskStatus, &entry); + + if (i == 0) { + epsetAssign(&epset, &(*pTask)->info.mnodeEpset); + } + } + + hbMsg.numOfTasks = taosArrayGetSize(hbMsg.pTaskStatus); + taosRUnLockLatch(&pMeta->lock); + + int32_t code = 0; + int32_t tlen = 0; + + tEncodeSize(tEncodeStreamHbMsg, &hbMsg, tlen, code); + if (code < 0) { + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + + SEncoder encoder; + tEncoderInit(&encoder, buf, tlen); + if ((code = tEncodeStreamHbMsg(&encoder, &hbMsg)) < 0) { + rpcFreeCont(buf); + qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + taosArrayDestroy(hbMsg.pTaskStatus); + taosReleaseRef(streamMetaId, rid); + return; + } + tEncoderClear(&encoder); + + taosArrayDestroy(hbMsg.pTaskStatus); + + SRpcMsg msg = {0}; + initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); + msg.info.noResp = 1; + + qDebug("vgId:%d, build and send hb to mnode", pMeta->vgId); + + tmsgSendReq(&epset, &msg); + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosReleaseRef(streamMetaId, rid); +} + +static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { + bool inTimer = false; + + taosWLockLatch(&pMeta->lock); + + void* pIter = NULL; + while (1) { + pIter = taosHashIterate(pMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->status.timerActive >= 1) { + inTimer = true; + } + } + + taosWUnLockLatch(&pMeta->lock); + return inTimer; +} + +void streamMetaNotifyClose(SStreamMeta* pMeta) { + int32_t vgId = pMeta->vgId; + + qDebug("vgId:%d notify all stream tasks that the vnode is closing", vgId); + taosWLockLatch(&pMeta->lock); + + void* pIter = NULL; + while (1) { + pIter = taosHashIterate(pMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + qDebug("vgId:%d s-task:%s set closing flag", vgId, pTask->id.idStr); + streamTaskStop(pTask); + } + + taosWUnLockLatch(&pMeta->lock); + + // wait for the stream meta hb function stopping + pMeta->hbInfo.stopFlag = STREAM_META_WILL_STOP; + while (pMeta->hbInfo.stopFlag != STREAM_META_OK_TO_STOP) { + taosMsleep(100); + qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + } + + qDebug("vgId:%d start to check all tasks", vgId); + int64_t st = taosGetTimestampMs(); + + while (hasStreamTaskInTimer(pMeta)) { + qDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + int64_t el = taosGetTimestampMs() - st; + qDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el); +} \ No newline at end of file diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index e28c93b8b1..34b0a00639 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -15,39 +15,90 @@ #include "streamInt.h" -SStreamQueue* streamQueueOpen(int64_t cap) { - SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); - if (pQueue == NULL) return NULL; - pQueue->queue = taosOpenQueue(); - pQueue->qall = taosAllocateQall(); - if (pQueue->queue == NULL || pQueue->qall == NULL) { - goto FAIL; - } - pQueue->status = STREAM_QUEUE__SUCESS; - taosSetQueueCapacity(pQueue->queue, cap); - taosSetQueueMemoryCapacity(pQueue->queue, cap * 1024); - return pQueue; +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define MIN_STREAM_EXEC_BATCH_NUM 4 +#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 +#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) -FAIL: - if (pQueue->queue) taosCloseQueue(pQueue->queue); - if (pQueue->qall) taosFreeQall(pQueue->qall); - taosMemoryFree(pQueue); - return NULL; -} - -void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { - qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->queue, taosQueueItemSize(pQueue->queue)); +// todo refactor: +// read data from input queue +typedef struct SQueueReader { + SStreamQueue* pQueue; + int32_t taskLevel; + int32_t maxBlocks; // maximum block in one batch + int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms +} SQueueReader; +static void streamQueueCleanup(SStreamQueue* pQueue) { void* qItem = NULL; while ((qItem = streamQueueNextItem(pQueue)) != NULL) { streamFreeQitem(qItem); } + pQueue->status = STREAM_QUEUE__SUCESS; +} + +static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; } + +SStreamQueue* streamQueueOpen(int64_t cap) { + SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); + if (pQueue == NULL) { + return NULL; + } + + pQueue->pQueue = taosOpenQueue(); + pQueue->qall = taosAllocateQall(); + + if (pQueue->pQueue == NULL || pQueue->qall == NULL) { + if (pQueue->pQueue) taosCloseQueue(pQueue->pQueue); + if (pQueue->qall) taosFreeQall(pQueue->qall); + taosMemoryFree(pQueue); + return NULL; + } + + pQueue->status = STREAM_QUEUE__SUCESS; + taosSetQueueCapacity(pQueue->pQueue, cap); + taosSetQueueMemoryCapacity(pQueue->pQueue, cap * 1024); + return pQueue; +} + +void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { + qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->pQueue, taosQueueItemSize(pQueue->pQueue)); + streamQueueCleanup(pQueue); taosFreeQall(pQueue->qall); - taosCloseQueue(pQueue->queue); + taosCloseQueue(pQueue->pQueue); taosMemoryFree(pQueue); } +void* streamQueueNextItem(SStreamQueue* pQueue) { + int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING); + + if (flag == STREAM_QUEUE__FAILED) { + ASSERT(pQueue->qItem != NULL); + return streamQueueCurItem(pQueue); + } else { + pQueue->qItem = NULL; + taosGetQitem(pQueue->qall, &pQueue->qItem); + if (pQueue->qItem == NULL) { + taosReadAllQitems(pQueue->pQueue, pQueue->qall); + taosGetQitem(pQueue->qall, &pQueue->qItem); + } + + return streamQueueCurItem(pQueue); + } +} + +void streamQueueProcessSuccess(SStreamQueue* queue) { + ASSERT(atomic_load_8(&queue->status) == STREAM_QUEUE__PROCESSING); + queue->qItem = NULL; + atomic_store_8(&queue->status, STREAM_QUEUE__SUCESS); +} + +void streamQueueProcessFail(SStreamQueue* queue) { + ASSERT(atomic_load_8(&queue->status) == STREAM_QUEUE__PROCESSING); + atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); +} + #if 0 bool streamQueueResEmpty(const SStreamQueueRes* pRes) { // @@ -106,60 +157,166 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) { } #endif -#define MAX_STREAM_EXEC_BATCH_NUM 128 -#define MIN_STREAM_EXEC_BATCH_NUM 16 +bool streamQueueIsFull(const STaosQueue* pQueue) { + bool isFull = taosQueueItemSize((STaosQueue*) pQueue) >= STREAM_TASK_INPUT_QUEUE_CAPACITY; + double size = SIZE_IN_MB(taosQueueMemorySize((STaosQueue*) pQueue)); + return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE); +} -// todo refactor: -// read data from input queue -typedef struct SQueueReader { - SStreamQueue* pQueue; - int32_t taskLevel; - int32_t maxBlocks; // maximum block in one batch - int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms -} SQueueReader; +int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { + int32_t retryTimes = 0; + int32_t MAX_RETRY_TIMES = 5; + const char* id = pTask->id.idStr; -SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* idstr) { - int32_t numOfBlocks = 0; - int32_t tryCount = 0; - SStreamQueueItem* pRet = NULL; - - while (1) { - SStreamQueueItem* qItem = streamQueueNextItem(pReader->pQueue); - if (qItem == NULL) { - if (pReader->taskLevel == TASK_LEVEL__SOURCE && numOfBlocks < MIN_STREAM_EXEC_BATCH_NUM && tryCount < pReader->waitDuration) { - tryCount++; - taosMsleep(1); - qDebug("try again batchSize:%d", numOfBlocks); - continue; + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one + while (1) { + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + return TSDB_CODE_SUCCESS; } - qDebug("break batchSize:%d", numOfBlocks); - break; - } + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputInfo.queue); + if (qItem == NULL) { + qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + return TSDB_CODE_SUCCESS; + } - if (pRet == NULL) { - pRet = qItem; - streamQueueProcessSuccess(pReader->pQueue); - if (pReader->taskLevel == TASK_LEVEL__SINK) { - break; - } - } else { - // todo we need to sort the data block, instead of just appending into the array list. - void* newRet = NULL; - if ((newRet = streamMergeQueueItem(pRet, qItem)) == NULL) { - streamQueueProcessFail(pReader->pQueue); - break; - } else { - numOfBlocks++; - pRet = newRet; - streamQueueProcessSuccess(pReader->pQueue); - if (numOfBlocks > pReader->maxBlocks) { - qDebug("maximum blocks limit:%d reached, processing, %s", pReader->maxBlocks, idstr); - break; - } - } + qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); + + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; } } - return pRet; + while (1) { + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + return TSDB_CODE_SUCCESS; + } + + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputInfo.queue); + if (qItem == NULL) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { + taosMsleep(10); + qDebug("===stream===try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); + continue; + } + + qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + return TSDB_CODE_SUCCESS; + } + + // do not merge blocks for sink node and check point data block + if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || + qItem->type == STREAM_INPUT__TRANS_STATE) { + const char* p = streamGetBlockTypeStr(qItem->type); + + if (*pInput == NULL) { + qDebug("s-task:%s %s msg extracted, start to process immediately", id, p); + + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; + } else { + // previous existed blocks needs to be handle, before handle the checkpoint msg block + qDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks); + streamQueueProcessFail(pTask->inputInfo.queue); + return TSDB_CODE_SUCCESS; + } + } else { + if (*pInput == NULL) { + ASSERT((*numOfBlocks) == 0); + *pInput = qItem; + } else { + // todo we need to sort the data block, instead of just appending into the array list. + void* newRet = streamMergeQueueItem(*pInput, qItem); + if (newRet == NULL) { + if (terrno != 0) { + qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, + tstrerror(terrno)); + } + + streamQueueProcessFail(pTask->inputInfo.queue); + return TSDB_CODE_SUCCESS; + } + + *pInput = newRet; + } + + *numOfBlocks += 1; + streamQueueProcessSuccess(pTask->inputInfo.queue); + + if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { + qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); + return TSDB_CODE_SUCCESS; + } + } + } +} + +int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) { + int8_t type = pItem->type; + STaosQueue* pQueue = pTask->inputInfo.queue->pQueue; + int32_t total = taosQueueItemSize(pQueue) + 1; + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + + if (type == STREAM_INPUT__DATA_SUBMIT) { + SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && streamQueueIsFull(pQueue)) { + qError( + "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + streamDataSubmitDestroy(px); + taosFreeQitem(pItem); + return -1; + } + + int32_t msgLen = px->submit.msgLen; + int64_t ver = px->submit.ver; + + int32_t code = taosWriteQitem(pQueue, pItem); + if (code != TSDB_CODE_SUCCESS) { + streamDataSubmitDestroy(px); + taosFreeQitem(pItem); + return code; + } + + // use the local variable to avoid the pItem be freed by other threads, since it has been put into queue already. + qDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, + msgLen, ver, total, size + SIZE_IN_MB(msgLen)); + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || + type == STREAM_INPUT__REF_DATA_BLOCK) { + if (streamQueueIsFull(pQueue)) { + qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + destroyStreamDataBlock((SStreamDataBlock*)pItem); + return -1; + } + + qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); + int32_t code = taosWriteQitem(pQueue, pItem); + if (code != TSDB_CODE_SUCCESS) { + destroyStreamDataBlock((SStreamDataBlock*)pItem); + return code; + } + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || + type == STREAM_INPUT__TRANS_STATE) { + taosWriteQitem(pQueue, pItem); + qDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, + pTask->info.taskLevel, streamGetBlockTypeStr(type), total, size); + } else if (type == STREAM_INPUT__GET_RES) { + // use the default memory limit, refactor later. + taosWriteQitem(pQueue, pItem); + qDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); + } else { + ASSERT(0); + } + + if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->info.triggerParam != 0) { + atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); + qDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); + } + + return 0; } diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 2506dbaead..4b86b9713c 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -13,7 +13,9 @@ * along with this program. If not, see . */ +#include #include "streamInt.h" +#include "trpc.h" #include "ttimer.h" #include "wal.h" @@ -24,7 +26,6 @@ typedef struct SStreamTaskRetryInfo { } SStreamTaskRetryInfo; static int32_t streamSetParamForScanHistory(SStreamTask* pTask); -static void launchFillHistoryTask(SStreamTask* pTask); static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); @@ -63,7 +64,9 @@ const char* streamGetTaskStatusStr(int32_t status) { case TASK_STATUS__SCAN_HISTORY: return "scan-history"; case TASK_STATUS__HALT: return "halt"; case TASK_STATUS__PAUSE: return "paused"; + case TASK_STATUS__CK: return "check-point"; case TASK_STATUS__DROPPING: return "dropping"; + case TASK_STATUS__STOP: return "stop"; default:return ""; } } @@ -92,8 +95,9 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { if (pTask->info.fillHistory) { streamSetParamForScanHistory(pTask); + streamTaskEnablePause(pTask); } - streamTaskEnablePause(pTask); + streamTaskScanHistoryPrepare(pTask); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); @@ -103,8 +107,8 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { } // check status -int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { - SHistDataRange* pRange = &pTask->dataRange; +static int32_t doCheckDownstreamStatus(SStreamTask* pTask) { + SDataRange* pRange = &pTask->dataRange; STimeWindow* pWindow = &pRange->window; SStreamTaskCheckReq req = { @@ -112,9 +116,10 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { .upstreamTaskId = pTask->id.taskId, .upstreamNodeId = pTask->info.nodeId, .childId = pTask->info.selfChildId, + .stage = pTask->pMeta->stage, }; - // serialize + // serialize streamProcessScanHistoryFinishRsp if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { req.reqId = tGenIdPI64(); req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; @@ -122,9 +127,9 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { pTask->checkReqId = req.reqId; qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64 - "-%" PRId64 ", req:0x%" PRIx64, + "-%" PRId64 ", stage:%"PRId64" req:0x%" PRIx64, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer, - pWindow->skey, pWindow->ekey, req.reqId); + pWindow->skey, pWindow->ekey, req.stage, req.reqId); streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { @@ -143,8 +148,8 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId, - req.downstreamTaskId, req.downstreamNodeId, i); + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d, stage:%" PRId64, + pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i, req.stage); streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { @@ -153,8 +158,7 @@ int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { streamTaskSetReady(pTask, 0); streamTaskSetRangeStreamCalc(pTask); streamTaskLaunchScanHistory(pTask); - - launchFillHistoryTask(pTask); + streamLaunchFillHistoryTask(pTask); } return 0; @@ -169,12 +173,12 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p .downstreamTaskId = pRsp->downstreamTaskId, .downstreamNodeId = pRsp->downstreamNodeId, .childId = pRsp->childId, + .stage = pTask->pMeta->stage, }; - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId, - req.downstreamTaskId, req.downstreamNodeId); - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; @@ -183,6 +187,8 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); if (pVgInfo->taskId == req.downstreamTaskId) { + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet); } } @@ -191,8 +197,28 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p return 0; } -int32_t streamTaskCheckStatus(SStreamTask* pTask) { - return (pTask->status.downstreamReady == 1)? 1:0; +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + ASSERT(pInfo != NULL); + + if (stage == -1) { + qDebug("s-task:%s receive check msg from upstream task:0x%x, invalid stageId:%" PRId64 ", not ready", pTask->id.idStr, + upstreamTaskId, stage); + return 0; + } + + if (pInfo->stage == -1) { + pInfo->stage = stage; + qDebug("s-task:%s receive check msg from upstream task:0x%x, init stage value:%" PRId64, pTask->id.idStr, + upstreamTaskId, stage); + } + + if (pInfo->stage < stage) { + qError("s-task:%s receive msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 ", prev:%" PRId64, + pTask->id.idStr, upstreamTaskId, vgId, stage, pInfo->stage); + } + + return ((pTask->status.downstreamReady == 1) && (pInfo->stage == stage))? 1:0; } static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { @@ -213,9 +239,10 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { } // when current stream task is ready, check the related fill history task. - launchFillHistoryTask(pTask); + streamLaunchFillHistoryTask(pTask); } +// todo handle error int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); const char* id = pTask->id.idStr; @@ -259,10 +286,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs doProcessDownstreamReadyRsp(pTask, 1); } } else { // not ready, wait for 100ms and retry - qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId, - pRsp->downstreamNodeId); + qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, wait for 100ms and retry", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage); taosMsleep(100); - streamRecheckDownstream(pTask, pRsp); } @@ -339,40 +365,7 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) { return streamScanExec(pTask, 100); } -int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { - SStreamScanHistoryFinishReq req = { - .streamId = pTask->id.streamId, - .childId = pTask->info.selfChildId, - .upstreamTaskId = pTask->id.taskId, - .upstreamNodeId = pTask->pMeta->vgId, - }; - - // serialize - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; - pTask->notReadyTasks = 1; - streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t numOfVgs = taosArrayGetSize(vgInfo); - pTask->notReadyTasks = numOfVgs; - - qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, - numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); - for (int32_t i = 0; i < numOfVgs; i++) { - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - req.downstreamTaskId = pVgInfo->taskId; - streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); - } - } else { - qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); - streamProcessScanHistoryFinishRsp(pTask); - } - - return 0; -} - -int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { +int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { SStreamDataBlock* pTranstate = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pTranstate == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -394,7 +387,7 @@ int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { taosArrayPush(pTranstate->blocks, pBlock); taosMemoryFree(pBlock); - if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pTranstate) < 0) { + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) { taosFreeQitem(pTranstate); return TSDB_CODE_OUT_OF_MEMORY; } @@ -405,7 +398,7 @@ int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { // agg int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask) { - pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); + pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, streamGetTaskStatusStr(pTask->status.taskStatus)); @@ -429,14 +422,31 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory int32_t taskLevel = pTask->info.taskLevel; ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK); - // sink node do not send end of scan history msg to its upstream, which is agg task. + if (pTask->status.taskStatus != TASK_STATUS__SCAN_HISTORY) { + qError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", + pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->upstreamTaskId); + + void* pBuf = NULL; + int32_t len = 0; + streamTaskBuildScanhistoryRspMsg(pTask, pReq, &pBuf, &len); + + SRpcMsg msg = {.info = *pRpcInfo}; + initRpcMsg(&msg, 0, pBuf, sizeof(SMsgHead) + len); + + tmsgSendRsp(&msg); + qDebug("s-task:%s level:%d notify upstream:0x%x(vgId:%d) to continue process data in WAL", pTask->id.idStr, + pTask->info.taskLevel, pReq->upstreamTaskId, pReq->upstreamNodeId); + return 0; + } + + // sink tasks do not send end of scan history msg to its upstream, which is agg task. streamAddEndScanHistoryMsg(pTask, pRpcInfo, pReq); int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1); ASSERT(left >= 0); if (left == 0) { - int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamInfoList); qDebug( "s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data and send " "rsp to all upstream tasks", @@ -446,11 +456,18 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory streamAggUpstreamScanHistoryFinish(pTask); } + // all upstream tasks have completed the scan-history task in the stream time window, let's start to extract data + // from the WAL files, which contains the real time stream data. streamNotifyUpstreamContinue(pTask); - // sink node does not receive the pause msg from mnode, so does not need enable it - if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - streamTaskEnablePause(pTask); + // mnode will not send the pause/resume message to the sink task, so no need to enable the pause for sink tasks. + if (taskLevel == TASK_LEVEL__AGG) { + /*int32_t code = */streamTaskScanHistoryDataComplete(pTask); + } else { // for sink task, set normal + if (pTask->status.taskStatus != TASK_STATUS__PAUSE && pTask->status.taskStatus != TASK_STATUS__STOP && + pTask->status.taskStatus != TASK_STATUS__DROPPING) { + streamSetStatusNormal(pTask); + } } } else { qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", @@ -484,7 +501,7 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) { +static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) { pHTask->dataRange.range.minVer = 0; pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer; @@ -498,7 +515,7 @@ static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) { } // check if downstream tasks have been ready - streamTaskDoCheckDownstreamTasks(pHTask); + doCheckDownstreamStatus(pHTask); } static void tryLaunchHistoryTask(void* param, void* tmrId) { @@ -545,7 +562,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { } if (pHTask != NULL) { - doCheckDownstreamStatus(pTask, pHTask); + checkFillhistoryTaskStatus(pTask, pHTask); streamMetaReleaseTask(pMeta, pHTask); } @@ -562,10 +579,20 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { // todo fix the bug: 2. race condition // an fill history task needs to be started. int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { + int32_t tId = pTask->historyTaskId.taskId; + if (tId == 0) { + return TSDB_CODE_SUCCESS; + } + + ASSERT(pTask->status.downstreamReady == 1); + qDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, + pTask->historyTaskId.streamId, tId); + SStreamMeta* pMeta = pTask->pMeta; int32_t hTaskId = pTask->historyTaskId.taskId; int64_t keys[2] = {pTask->historyTaskId.streamId, pTask->historyTaskId.taskId}; + // Set the execute conditions, including the query time window and the version range SStreamTask** pHTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if (pHTask == NULL) { @@ -592,11 +619,16 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); } - // try again in 500ms + // try again in 100ms return TSDB_CODE_SUCCESS; } - doCheckDownstreamStatus(pTask, *pHTask); + if ((*pHTask)->status.downstreamReady == 1) { + qDebug("s-task:%s fill-history task is ready, no need to check downstream", (*pHTask)->id.idStr); + } else { + checkFillhistoryTaskStatus(pTask, *pHTask); + } + return TSDB_CODE_SUCCESS; } @@ -659,6 +691,7 @@ int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } @@ -672,6 +705,7 @@ int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1; tEndDecode(pDecoder); return 0; } @@ -685,6 +719,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; @@ -699,6 +734,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1; tEndDecode(pDecoder); return 0; @@ -728,7 +764,7 @@ int32_t tDecodeStreamScanHistoryFinishReq(SDecoder* pDecoder, SStreamScanHistory void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { if (pTask->historyTaskId.taskId == 0) { - SHistDataRange* pRange = &pTask->dataRange; + SDataRange* pRange = &pTask->dataRange; if (pTask->info.fillHistory == 1) { qDebug("s-task:%s fill-history task, time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 "-%" PRId64, @@ -739,7 +775,7 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } } else { - SHistDataRange* pRange = &pTask->dataRange; + SDataRange* pRange = &pTask->dataRange; int64_t ekey = 0; if (pRange->window.ekey < INT64_MAX) { @@ -762,36 +798,19 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { } } -void launchFillHistoryTask(SStreamTask* pTask) { - int32_t tId = pTask->historyTaskId.taskId; - if (tId == 0) { - return; - } - - ASSERT(pTask->status.downstreamReady == 1); - qDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, - pTask->historyTaskId.streamId, tId); - - // launch associated fill history task - streamLaunchFillHistoryTask(pTask); -} - -void streamTaskCheckDownstreamTasks(SStreamTask* pTask) { +// only the downstream tasks are ready, set the task to be ready to work. +void streamTaskCheckDownstream(SStreamTask* pTask) { if (pTask->info.fillHistory) { qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); return; } ASSERT(pTask->status.downstreamReady == 0); - - // check downstream tasks for itself - streamTaskDoCheckDownstreamTasks(pTask); + doCheckDownstreamStatus(pTask); } // normal -> pause, pause/stop/dropping -> pause, halt -> pause, scan-history -> pause -void streamTaskPause(SStreamTask* pTask) { - SStreamMeta* pMeta = pTask->pMeta; - +void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { int64_t st = taosGetTimestampMs(); int8_t status = pTask->status.taskStatus; @@ -806,6 +825,12 @@ void streamTaskPause(SStreamTask* pTask) { return; } + if(pTask->info.taskLevel == TASK_LEVEL__SINK) { + int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + return; + } + while (!pTask->status.pauseAllowed || (pTask->status.taskStatus == TASK_STATUS__HALT)) { status = pTask->status.taskStatus; if (status == TASK_STATUS__DROPPING) { @@ -835,6 +860,8 @@ void streamTaskPause(SStreamTask* pTask) { atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); taosWUnLockLatch(&pMeta->lock); // in case of fill-history task, stop the tsdb file scan operation. @@ -844,16 +871,20 @@ void streamTaskPause(SStreamTask* pTask) { } int64_t el = taosGetTimestampMs() - st; - qDebug("vgId:%d s-task:%s set pause flag, prev:%s, elapsed time:%dms", pMeta->vgId, pTask->id.idStr, + qDebug("vgId:%d s-task:%s set pause flag, prev:%s, pause elapsed time:%dms", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(pTask->status.keepTaskStatus), (int32_t)el); } -void streamTaskResume(SStreamTask* pTask) { +void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta) { int8_t status = pTask->status.taskStatus; if (status == TASK_STATUS__PAUSE) { pTask->status.taskStatus = pTask->status.keepTaskStatus; pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; - qDebug("s-task:%s resume from pause", pTask->id.idStr); + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + qInfo("vgId:%d s-task:%s sink task.resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else { qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); } diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c new file mode 100644 index 0000000000..8a4500dd86 --- /dev/null +++ b/source/libs/stream/src/streamSnapshot.c @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "streamSnapshot.h" +#include "query.h" +#include "rocksdb/c.h" +#include "streamBackendRocksdb.h" +#include "tcommon.h" + +enum SBackendFileType { + ROCKSDB_OPTIONS_TYPE = 1, + ROCKSDB_MAINFEST_TYPE = 2, + ROCKSDB_SST_TYPE = 3, + ROCKSDB_CURRENT_TYPE = 4, + ROCKSDB_CHECKPOINT_META_TYPE = 5, +}; + +typedef struct SBackendFileItem { + char* name; + int8_t type; + int64_t size; +} SBackendFileItem; +typedef struct SBackendFile { + char* pCurrent; + char* pMainfest; + char* pOptions; + SArray* pSst; + char* pCheckpointMeta; + char* path; +} SBanckendFile; +struct SStreamSnapHandle { + void* handle; + SBanckendFile* pBackendFile; + int64_t checkpointId; + int64_t seraial; + int64_t offset; + TdFilePtr fd; + int8_t filetype; + SArray* pFileList; + int32_t currFileIdx; +}; +struct SStreamSnapBlockHdr { + int8_t type; + int8_t flag; + int64_t index; + char name[128]; + int64_t totalSize; + int64_t size; + uint8_t data[]; +}; +struct SStreamSnapReader { + void* pMeta; + int64_t sver; + int64_t ever; + SStreamSnapHandle handle; + int64_t checkpointId; +}; +struct SStreamSnapWriter { + void* pMeta; + int64_t sver; + int64_t ever; + SStreamSnapHandle handle; +}; +const char* ROCKSDB_OPTIONS = "OPTIONS"; +const char* ROCKSDB_MAINFEST = "MANIFEST"; +const char* ROCKSDB_SST = "sst"; +const char* ROCKSDB_CURRENT = "CURRENT"; +const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; +static int64_t kBlockSize = 64 * 1024; + +int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta); +void streamSnapHandleDestroy(SStreamSnapHandle* handle); + +// static void streamBuildFname(char* path, char* file, char* fullname) + +#define STREAM_ROCKSDB_BUILD_FULLNAME(path, file, fullname) \ + do { \ + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, file); \ + } while (0) + +int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { + int ret = 0; + + char* fullname = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); + + ret = taosStatFile(fullname, sz, NULL, NULL); + taosMemoryFree(fullname); + + return ret; +} + +TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) { + char fullname[256] = {0}; + STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname); + return taosOpenFile(fullname, opt); +} + +int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) { + // impl later + int len = strlen(path); + char* tdir = taosMemoryCalloc(1, len + 256); + memcpy(tdir, path, len); + + int32_t code = 0; + + int8_t validChkp = 0; + if (chkpId != 0) { + sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP, + chkpId); + if (taosIsDir(tdir)) { + validChkp = 1; + qInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); + streamBackendAddInUseChkp(pMeta, chkpId); + } else { + qWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, tdir); + } + } + + // no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it + if (validChkp == 0) { + sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state"); + char* chkpdir = taosMemoryCalloc(1, len + 256); + sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp"); + taosMemoryFree(tdir); + + tdir = chkpdir; + qInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); + + code = streamBackendTriggerChkp(pMeta, tdir); + if (code != 0) { + qError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); + taosMemoryFree(tdir); + return code; + } + chkpId = 0; + } + + qInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); + + TdDirPtr pDir = taosOpenDir(tdir); + if (NULL == pDir) { + qError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); + goto _err; + } + + SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); + pHandle->pBackendFile = pFile; + pHandle->checkpointId = chkpId; + pHandle->seraial = 0; + + pFile->path = tdir; + pFile->pSst = taosArrayInit(16, sizeof(void*)); + + TdDirEntryPtr pDirEntry; + while ((pDirEntry = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(pDirEntry); + if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { + pFile->pCurrent = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { + pFile->pMainfest = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { + pFile->pOptions = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && + 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { + pFile->pCheckpointMeta = taosStrdup(name); + continue; + } + if (strlen(name) >= strlen(ROCKSDB_SST) && + 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { + char* sst = taosStrdup(name); + taosArrayPush(pFile->pSst, &sst); + } + } + { + char* buf = taosMemoryCalloc(1, 512); + sprintf(buf, "[current: %s,", pFile->pCurrent); + sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); + sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); + + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* name = taosArrayGetP(pFile->pSst, i); + sprintf(buf + strlen(buf), "%s,", name); + } + sprintf(buf + strlen(buf) - 1, "]"); + + qInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); + taosMemoryFree(buf); + } + + taosCloseDir(&pDir); + + if (pFile->pCurrent == NULL) { + qError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); + code = -1; + tdir = NULL; + goto _err; + } + SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); + + SBackendFileItem item; + // current + item.name = pFile->pCurrent; + item.type = ROCKSDB_CURRENT_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + + // mainfest + item.name = pFile->pMainfest; + item.type = ROCKSDB_MAINFEST_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + + // options + item.name = pFile->pOptions; + item.type = ROCKSDB_OPTIONS_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + // sst + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* sst = taosArrayGetP(pFile->pSst, i); + item.name = sst; + item.type = ROCKSDB_SST_TYPE; + streamGetFileSize(pFile->path, item.name, &item.size); + taosArrayPush(list, &item); + } + // meta + item.name = pFile->pCheckpointMeta; + item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) { + taosArrayPush(list, &item); + } + + pHandle->pBackendFile = pFile; + + pHandle->currFileIdx = 0; + pHandle->pFileList = list; + pHandle->seraial = 0; + pHandle->offset = 0; + pHandle->handle = pMeta; + return 0; +_err: + streamSnapHandleDestroy(pHandle); + taosMemoryFreeClear(tdir); + + code = -1; + return code; +} + +void streamSnapHandleDestroy(SStreamSnapHandle* handle) { + SBanckendFile* pFile = handle->pBackendFile; + + if (handle->checkpointId == 0) { + // del tmp dir + if (taosIsDir(pFile->path)) { + taosRemoveDir(pFile->path); + } + } else { + streamBackendDelInUseChkp(handle->handle, handle->checkpointId); + } + if (pFile) { + taosMemoryFree(pFile->pCheckpointMeta); + taosMemoryFree(pFile->pCurrent); + taosMemoryFree(pFile->pMainfest); + taosMemoryFree(pFile->pOptions); + taosMemoryFree(pFile->path); + for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { + char* sst = taosArrayGetP(pFile->pSst, i); + taosMemoryFree(sst); + } + taosArrayDestroy(pFile->pSst); + taosMemoryFree(pFile); + } + taosArrayDestroy(handle->pFileList); + taosCloseFile(&handle->fd); + return; +} + +int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* path, SStreamSnapReader** ppReader) { + // impl later + SStreamSnapReader* pReader = taosMemoryCalloc(1, sizeof(SStreamSnapReader)); + if (pReader == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) { + taosMemoryFree(pReader); + return -1; + } + + *ppReader = pReader; + + return 0; +} +int32_t streamSnapReaderClose(SStreamSnapReader* pReader) { + if (pReader == NULL) return 0; + + streamSnapHandleDestroy(&pReader->handle); + taosMemoryFree(pReader); + return 0; +} +int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) { + // impl later + int32_t code = 0; + SStreamSnapHandle* pHandle = &pReader->handle; + SBanckendFile* pFile = pHandle->pBackendFile; + + SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + + if (pHandle->fd == NULL) { + if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + // finish + *ppData = NULL; + *size = 0; + return 0; + } else { + pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + qDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + } + } + + qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); + int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + if (nread == -1) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, + item->type, tstrerror(code)); + return -1; + } else if (nread > 0 && nread <= kBlockSize) { + // left bytes less than kBlockSize + qDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + pHandle->offset += nread; + if (pHandle->offset >= item->size || nread < kBlockSize) { + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + } + } else { + qDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, + pHandle->currFileIdx); + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + + if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + // finish + *ppData = NULL; + *size = 0; + return 0; + } + item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + + nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + pHandle->offset += nread; + + qDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", + STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + } + + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf; + pHdr->size = nread; + pHdr->type = item->type; + pHdr->totalSize = item->size; + + memcpy(pHdr->name, item->name, strlen(item->name)); + pHandle->seraial += nread; + + *ppData = buf; + *size = sizeof(SStreamSnapBlockHdr) + nread; + return 0; +} +// SMetaSnapWriter ======================================== +int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter) { + // impl later + SStreamSnapWriter* pWriter = taosMemoryCalloc(1, sizeof(SStreamSnapWriter)); + if (pWriter == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + SStreamSnapHandle* pHandle = &pWriter->handle; + + SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); + pFile->path = taosStrdup(path); + SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); + + SBackendFileItem item; + item.name = taosStrdup((char*)ROCKSDB_CURRENT); + item.type = ROCKSDB_CURRENT_TYPE; + taosArrayPush(list, &item); + + pHandle->pBackendFile = pFile; + + pHandle->pFileList = list; + pHandle->currFileIdx = 0; + pHandle->offset = 0; + + *ppWriter = pWriter; + return 0; +} + +int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { + int32_t code = 0; + + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; + SStreamSnapHandle* pHandle = &pWriter->handle; + SBanckendFile* pFile = pHandle->pBackendFile; + SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + + if (pHandle->fd == NULL) { + pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pHandle->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, + tstrerror(code)); + } + } + + if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { + int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); + if (bytes != pHdr->size) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } + pHandle->offset += bytes; + } else { + taosCloseFile(&pHandle->fd); + pHandle->offset = 0; + pHandle->currFileIdx += 1; + + SBackendFileItem item; + item.name = taosStrdup(pHdr->name); + item.type = pHdr->type; + taosArrayPush(pHandle->pFileList, &item); + + SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pHandle->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, + tstrerror(code)); + } + + taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); + pHandle->offset += pHdr->size; + } + + // impl later + return 0; +} +int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { + SStreamSnapHandle* handle = &pWriter->handle; + if (qDebugFlag & DEBUG_DEBUG) { + char* buf = (char*)taosMemoryMalloc(1024); + int n = sprintf(buf, "["); + for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { + SBackendFileItem* item = taosArrayGet(handle->pFileList, i); + if (i != taosArrayGetSize(handle->pFileList) - 1) { + n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size); + } else { + n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size); + } + } + qDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); + taosMemoryFree(buf); + } + + for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { + SBackendFileItem* item = taosArrayGet(handle->pFileList, i); + taosMemoryFree(item->name); + } + + streamSnapHandleDestroy(handle); + taosMemoryFree(pWriter); + + return 0; +} diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 8694e5cf4c..83aed42fe2 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -108,7 +108,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz SStreamTask* pStreamTask = pTask; char statePath[1024]; if (!specPath) { - sprintf(statePath, "%s/%d", path, pStreamTask->id.taskId); + sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId); } else { memset(statePath, 0, 1024); tstrncpy(statePath, path, 1024); @@ -128,7 +128,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz if (uniqueId == NULL) { int code = streamStateOpenBackend(pMeta->streamBackend, pState); if (code == -1) { - taosReleaseRef(streamBackendId, pState->streamBackendRid); taosThreadMutexUnlock(&pMeta->backendMutex); taosMemoryFree(pState); return NULL; @@ -139,8 +138,9 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz int64_t id = *(int64_t*)uniqueId; pState->pTdbState->backendCfWrapperId = id; pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); - - taosAcquireRef(streamBackendId, pState->streamBackendRid); + // already exist stream task for + qInfo("already exist stream-state for %s", pState->pTdbState->idstr); + // taosAcquireRef(streamBackendId, pState->streamBackendRid); } taosThreadMutexUnlock(&pMeta->backendMutex); @@ -149,6 +149,8 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); + qInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, + pState->taskId); return pState; #else @@ -424,10 +426,15 @@ int32_t streamStateSaveInfo(SStreamState* pState, void* pKey, int32_t keyLen, vo code = streamStatePutBatch(pState, "default", batch, pKey, pVal, vLen, 0); if (code != 0) { + streamStateDestroyBatch(batch); return code; } code = streamStatePutBatch_rocksdb(pState, batch); streamStateDestroyBatch(batch); + // code = streamDefaultPut_rocksdb(pState, pKey, pVal, vLen); + // char* Val = NULL; + // int32_t len = 0; + // code = streamDefaultGet_rocksdb(pState, pKey, (void**)&Val, &len); return code; #else return 0; @@ -729,7 +736,8 @@ void streamStateFreeVal(void* val) { int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { #ifdef USE_ROCKSDB - qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); + qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + key->groupId); return streamStateSessionPut_rocksdb(pState, key, value, vLen); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; @@ -763,7 +771,8 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); + qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + key->groupId); return streamStateSessionDel_rocksdb(pState, key); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 232ca132ab..711dbf65e7 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -13,11 +13,12 @@ * along with this program. If not, see . */ -#include "streamInt.h" #include "executor.h" +#include "streamInt.h" +#include "tmisce.h" #include "tstream.h" -#include "wal.h" #include "ttimer.h" +#include "wal.h" static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); @@ -38,7 +39,7 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->id.streamId = streamId; pTask->info.taskLevel = taskLevel; pTask->info.fillHistory = fillHistory; - pTask->triggerParam = triggerParam; + pTask->info.triggerParam = triggerParam; char buf[128] = {0}; sprintf(buf, "0x%" PRIx64 "-%d", pTask->id.streamId, pTask->id.taskId); @@ -46,7 +47,7 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->id.idStr = taosStrdup(buf); pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY; - pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; + pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; addToTaskset(pTaskList, pTask); @@ -59,6 +60,7 @@ int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) if (tEncodeI32(pEncoder, pInfo->childId) < 0) return -1; /*if (tEncodeI64(pEncoder, pInfo->processedVer) < 0) return -1;*/ if (tEncodeSEpSet(pEncoder, &pInfo->epSet) < 0) return -1; + if (tEncodeI64(pEncoder, pInfo->stage) < 0) return -1; return 0; } @@ -68,6 +70,7 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { if (tDecodeI32(pDecoder, &pInfo->childId) < 0) return -1; /*if (tDecodeI64(pDecoder, &pInfo->processedVer) < 0) return -1;*/ if (tDecodeSEpSet(pDecoder, &pInfo->epSet) < 0) return -1; + if (tDecodeI64(pDecoder, &pInfo->stage) < 0) return -1; return 0; } @@ -87,9 +90,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pTask->info.mnodeEpset) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointId) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointVer) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1; if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1; @@ -102,10 +106,10 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; - int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList); + int32_t epSz = taosArrayGetSize(pTask->pUpstreamInfoList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } @@ -129,7 +133,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tSerializeSUseDbRspImp(pEncoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; } - if (tEncodeI64(pEncoder, pTask->triggerParam) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->info.triggerParam) < 0) return -1; + if (tEncodeCStrWithLen(pEncoder, pTask->reserve, sizeof(pTask->reserve) - 1) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; @@ -153,9 +158,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pTask->info.mnodeEpset) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointId) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointVer) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1; @@ -171,7 +177,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { int32_t epSz = -1; if (tDecodeI32(pDecoder, &epSz) < 0) return -1; - pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES); + pTask->pUpstreamInfoList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); if (pInfo == NULL) return -1; @@ -179,7 +185,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { taosMemoryFreeClear(pInfo); return -1; } - taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo); + taosArrayPush(pTask->pUpstreamInfoList, &pInfo); } if (pTask->info.taskLevel != TASK_LEVEL__SINK) { @@ -204,7 +210,55 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDeserializeSUseDbRspImp(pDecoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; } - if (tDecodeI64(pDecoder, &pTask->triggerParam) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->info.triggerParam) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pTask->reserve) < 0) return -1; + + tEndDecode(pDecoder); + return 0; +} + +int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) { + int64_t ver; + int64_t skip64; + int8_t skip8; + int32_t skip32; + int16_t skip16; + SEpSet epSet; + + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &ver) < 0) return -1; + + if (ver != SSTREAM_TASK_VER) return -1; + + if (tDecodeI64(pDecoder, &skip64) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI16(pDecoder, &skip16) < 0) return -1; + + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + if (tDecodeI8(pDecoder, &skip8) < 0) return -1; + + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeI32(pDecoder, &skip32) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &epSet) < 0) return -1; + + if (tDecodeI64(pDecoder, &pChkpInfo->checkpointId) < 0) return -1; + if (tDecodeI64(pDecoder, &pChkpInfo->checkpointVer) < 0) return -1; + + tEndDecode(pDecoder); + return 0; +} +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId) { + int64_t ver; + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &ver) < 0) return -1; + if (ver != SSTREAM_TASK_VER) return -1; + + if (tDecodeI64(pDecoder, &pTaskId->streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pTaskId->taskId) < 0) return -1; tEndDecode(pDecoder); return 0; @@ -215,18 +269,25 @@ static void freeItem(void* p) { rpcFreeCont(pInfo->msg.pCont); } +static void freeUpstreamItem(void* p) { + SStreamChildEpInfo** pInfo = p; + taosMemoryFree(*pInfo); +} + void tFreeStreamTask(SStreamTask* pTask) { - qDebug("free s-task:0x%x, %p", pTask->id.taskId, pTask); + int32_t taskId = pTask->id.taskId; + + qDebug("free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState); // remove the ref by timer - while(pTask->status.timerActive > 0) { + while (pTask->status.timerActive > 0) { qDebug("s-task:%s wait for task stop timer activities", pTask->id.idStr); taosMsleep(10); } - if (pTask->schedTimer != NULL) { - taosTmrStop(pTask->schedTimer); - pTask->schedTimer = NULL; + if (pTask->schedInfo.pTimer != NULL) { + taosTmrStop(pTask->schedInfo.pTimer); + pTask->schedInfo.pTimer = NULL; } if (pTask->launchTaskTimer != NULL) { @@ -235,8 +296,8 @@ void tFreeStreamTask(SStreamTask* pTask) { } int32_t status = atomic_load_8((int8_t*)&(pTask->status.taskStatus)); - if (pTask->inputQueue) { - streamQueueClose(pTask->inputQueue, pTask->id.taskId); + if (pTask->inputInfo.queue) { + streamQueueClose(pTask->inputInfo.queue, pTask->id.taskId); } if (pTask->outputInfo.queue) { @@ -256,21 +317,22 @@ void tFreeStreamTask(SStreamTask* pTask) { walCloseReader(pTask->exec.pWalReader); } - taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree); if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); tSimpleHashCleanup(pTask->tbSink.pTblInfo); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); - taosArrayDestroy(pTask->checkReqIds); - pTask->checkReqIds = NULL; + pTask->checkReqIds = taosArrayDestroy(pTask->checkReqIds); } if (pTask->pState) { + qDebug("s-task:0x%x start to free task state", taskId); streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); } + pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); + taosThreadMutexDestroy(&pTask->lock); if (pTask->msgInfo.pData != NULL) { destroyStreamDataBlock(pTask->msgInfo.pData); pTask->msgInfo.pData = NULL; @@ -289,6 +351,203 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->pRspMsgList = NULL; } + if (pTask->pUpstreamInfoList != NULL) { + taosArrayDestroyEx(pTask->pUpstreamInfoList, freeUpstreamItem); + pTask->pUpstreamInfoList = NULL; + } + taosThreadMutexDestroy(&pTask->lock); taosMemoryFree(pTask); + + qDebug("s-task:0x%x free task completed", taskId); +} + +int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) { + pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); + pTask->refCnt = 1; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.timerActive = 0; + pTask->inputInfo.queue = streamQueueOpen(512 << 10); + pTask->outputInfo.queue = streamQueueOpen(512 << 10); + + if (pTask->inputInfo.queue == NULL || pTask->outputInfo.queue == NULL) { + qError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); + return -1; + } + + pTask->tsInfo.init = taosGetTimestampMs(); + pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; + pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; + pTask->pMeta = pMeta; + + pTask->chkInfo.currentVer = ver; + pTask->dataRange.range.maxVer = ver; + pTask->dataRange.range.minVer = ver; + pTask->pMsgCb = pMsgCb; + + taosThreadMutexInit(&pTask->lock, NULL); + streamTaskOpenAllUpstreamInput(pTask); + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) { + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + return 0; + } else { + int32_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__TABLE) { + return 1; + } else { + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + return taosArrayGetSize(vgInfo); + } + } +} + +static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { + SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); + if (pEpInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pEpInfo->childId = pTask->info.selfChildId; + pEpInfo->epSet = pTask->info.epSet; + pEpInfo->nodeId = pTask->info.nodeId; + pEpInfo->taskId = pTask->id.taskId; + pEpInfo->stage = -1; + + return pEpInfo; +} + +int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) { + SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); + if (pEpInfo == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + if (pTask->pUpstreamInfoList == NULL) { + pTask->pUpstreamInfoList = taosArrayInit(4, POINTER_BYTES); + } + + taosArrayPush(pTask->pUpstreamInfoList, &pEpInfo); + return TSDB_CODE_SUCCESS; +} + +void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) { + char buf[512] = {0}; + EPSET_TO_STR(pEpSet, buf); + + int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < numOfUpstream; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + if (pInfo->nodeId == nodeId) { + epsetAssign(&pInfo->epSet, pEpSet); + qDebug("s-task:0x%x update the upstreamInfo, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + break; + } + } +} + +void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) { + STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + pDispatcher->taskId = pDownstreamTask->id.taskId; + pDispatcher->nodeId = pDownstreamTask->info.nodeId; + pDispatcher->epSet = pDownstreamTask->info.epSet; + + pTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH; + pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; +} + +void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet) { + char buf[512] = {0}; + EPSET_TO_STR(pEpSet, buf); + + int8_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + + int32_t numOfVgroups = taosArrayGetSize(pVgs); + for (int32_t i = 0; i < numOfVgroups; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i); + + if (pVgInfo->vgId == nodeId) { + epsetAssign(&pVgInfo->epSet, pEpSet); + qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + break; + } + } + } else if (type == TASK_OUTPUT__FIXED_DISPATCH) { + STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + if (pDispatcher->nodeId == nodeId) { + epsetAssign(&pDispatcher->epSet, pEpSet); + qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpSet:%s", pTask->id.taskId, nodeId, buf); + } + } else { + // do nothing + } +} + +int32_t streamTaskStop(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + int64_t st = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + + pTask->status.taskStatus = TASK_STATUS__STOP; + qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + + while (/*pTask->status.schedStatus != TASK_SCHED_STATUS__INACTIVE */ !streamTaskIsIdle(pTask)) { + qDebug("s-task:%s level:%d wait for task to be idle, check again in 100ms", id, pTask->info.taskLevel); + taosMsleep(100); + } + + int64_t el = taosGetTimestampMs() - st; + qDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pMeta->vgId, pTask->id.idStr, el); + return 0; +} + +int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) { + char buf[512] = {0}; + + if (pTask->info.nodeId == nodeId) { // execution task should be moved away + epsetAssign(&pTask->info.epSet, pEpSet); + EPSET_TO_STR(pEpSet, buf) + qDebug("s-task:0x%x (vgId:%d) self node epset is updated %s", pTask->id.taskId, nodeId, buf); + } + + // check for the dispath info and the upstream task info + int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__SOURCE) { + streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet); + } else if (level == TASK_LEVEL__AGG) { + streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet); + streamTaskUpdateDownstreamInfo(pTask, nodeId, pEpSet); + } else { // TASK_LEVEL__SINK + streamTaskUpdateUpstreamInfo(pTask, nodeId, pEpSet); + } + + return 0; +} + +int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { + for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { + SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); + doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp); + } + return 0; +} + +void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + return; + } + + int32_t size = taosArrayGetSize(pTask->pUpstreamInfoList); + for (int32_t i = 0; i < size; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + pInfo->stage = -1; + } + + qDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr); } diff --git a/source/libs/stream/src/streamUpdate.c b/source/libs/stream/src/streamUpdate.c index 7a8de91d77..f9ab672c4b 100644 --- a/source/libs/stream/src/streamUpdate.c +++ b/source/libs/stream/src/streamUpdate.c @@ -89,11 +89,11 @@ static int64_t adjustWatermark(int64_t adjInterval, int64_t originInt, int64_t w return watermark; } -SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark) { - return updateInfoInit(pInterval->interval, pInterval->precision, watermark); +SUpdateInfo *updateInfoInitP(SInterval *pInterval, int64_t watermark, bool igUp) { + return updateInfoInit(pInterval->interval, pInterval->precision, watermark, igUp); } -SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark) { +SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t watermark, bool igUp) { SUpdateInfo *pInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); if (pInfo == NULL) { return NULL; @@ -104,30 +104,33 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma pInfo->interval = adjustInterval(interval, precision); pInfo->watermark = adjustWatermark(pInfo->interval, interval, watermark); - uint64_t bfSize = (uint64_t)(pInfo->watermark / pInfo->interval); + uint64_t bfSize = 0; + if (!igUp) { + bfSize = (uint64_t)(pInfo->watermark / pInfo->interval); - pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *)); - if (pInfo->pTsSBFs == NULL) { - updateInfoDestroy(pInfo); - return NULL; + pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *)); + if (pInfo->pTsSBFs == NULL) { + updateInfoDestroy(pInfo); + return NULL; + } + windowSBfAdd(pInfo, bfSize); + + pInfo->pTsBuckets = taosArrayInit(DEFAULT_BUCKET_SIZE, sizeof(TSKEY)); + if (pInfo->pTsBuckets == NULL) { + updateInfoDestroy(pInfo); + return NULL; + } + + TSKEY dumy = 0; + for (uint64_t i = 0; i < DEFAULT_BUCKET_SIZE; ++i) { + taosArrayPush(pInfo->pTsBuckets, &dumy); + } + pInfo->numBuckets = DEFAULT_BUCKET_SIZE; + pInfo->pCloseWinSBF = NULL; + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT); + pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK); } pInfo->numSBFs = bfSize; - windowSBfAdd(pInfo, bfSize); - - pInfo->pTsBuckets = taosArrayInit(DEFAULT_BUCKET_SIZE, sizeof(TSKEY)); - if (pInfo->pTsBuckets == NULL) { - updateInfoDestroy(pInfo); - return NULL; - } - - TSKEY dumy = 0; - for (uint64_t i = 0; i < DEFAULT_BUCKET_SIZE; ++i) { - taosArrayPush(pInfo->pTsBuckets, &dumy); - } - pInfo->numBuckets = DEFAULT_BUCKET_SIZE; - pInfo->pCloseWinSBF = NULL; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT); - pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK); pInfo->maxDataVersion = 0; return pInfo; } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index dd857141c1..bca9dcabda 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -49,7 +49,8 @@ struct SStreamFileState { typedef SRowBuffPos SRowBuffInfo; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) { + GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, + int64_t checkpointId) { if (memSize <= 0) { memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE; } @@ -83,9 +84,9 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ pFileState->deleteMark = delMark; pFileState->flushMark = INT64_MIN; pFileState->maxTs = INT64_MIN; - pFileState->id = taosStrdup(idstr); + pFileState->id = taosStrdup(taskId); - recoverSnapshot(pFileState); + recoverSnapshot(pFileState, checkpointId); return pFileState; _error: @@ -385,7 +386,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, 0, buf); // todo handle failure memset(buf, 0, len); -// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); + // qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); } taosMemoryFree(buf); @@ -396,8 +397,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, streamStateClearBatch(batch); int64_t elapsed = taosGetTimestampMs() - st; - qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems, - BATCH_LIMIT, elapsed); + qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%" PRId64 "ms", + pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { const char* taskKey = "streamFileState"; @@ -479,7 +480,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { return code; } -int32_t recoverSnapshot(SStreamFileState* pFileState) { +int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { int32_t code = TSDB_CODE_SUCCESS; if (pFileState->maxTs != INT64_MIN) { int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs) @@ -487,8 +488,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) { : pFileState->maxTs - pFileState->deleteMark; deleteExpiredCheckPoint(pFileState, mark); } - void* pStVal = NULL; - int32_t len = 0; SWinKey key = {.groupId = 0, .ts = 0}; SStreamStateCur* pCur = streamStateSeekToLast_rocksdb(pFileState->pFileStore, &key); @@ -508,9 +507,12 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) { destroyRowBuffPos(pNewPos); SListNode* pNode = tdListPopTail(pFileState->usedBuffs); taosMemoryFreeClear(pNode); + taosMemoryFreeClear(pVal); break; } + ASSERT(pVLen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, pVLen); + taosMemoryFreeClear(pVal); code = tSimpleHashPut(pFileState->rowBuffMap, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); if (code != TSDB_CODE_SUCCESS) { destroyRowBuffPos(pNewPos); diff --git a/source/libs/stream/test/tstreamUpdateTest.cpp b/source/libs/stream/test/tstreamUpdateTest.cpp index 0e84d6b8bd..f63939ac9e 100644 --- a/source/libs/stream/test/tstreamUpdateTest.cpp +++ b/source/libs/stream/test/tstreamUpdateTest.cpp @@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test { protected: virtual void SetUp() { streamMetaInit(); - backend = streamBackendInit(path); + backend = streamBackendInit(path, 0); } virtual void TearDown() { streamMetaCleanup(); diff --git a/source/libs/tdb/test/tdbPageRecycleTest.cpp b/source/libs/tdb/test/tdbPageRecycleTest.cpp index 4d7b314917..d740bd0f94 100644 --- a/source/libs/tdb/test/tdbPageRecycleTest.cpp +++ b/source/libs/tdb/test/tdbPageRecycleTest.cpp @@ -804,7 +804,7 @@ TEST(TdbPageRecycleTest, recycly_delete_interior_ofp_nocommit) { // sprintf(&key[count - 2], "%c", i); key[count - 2] = '0' + i; - ret = tdbTbInsert(pDb, key, count, NULL, NULL, txn); + ret = tdbTbInsert(pDb, key, count, NULL, 0, txn); GTEST_ASSERT_EQ(ret, 0); } } diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index a6b7a20f76..17ef6ce530 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -262,7 +262,6 @@ bool transAsyncPoolIsEmpty(SAsyncPool* pool); #define ASYNC_CHECK_HANDLE(exh1, id) \ do { \ if (id > 0) { \ - tTrace("handle step1"); \ SExHandle* exh2 = transAcquireExHandle(transGetRefMgt(), id); \ if (exh2 == NULL || id != exh2->refId) { \ tTrace("handle %p except, may already freed, ignore msg, ref1:%" PRIu64 ", ref2:%" PRIu64, exh1, \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index cfdc5b5e8b..b02c8aad26 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -12,7 +12,10 @@ * along with this program. If not, see . */ +// clang-format off #include "transComm.h" +#include "tmisce.h" +// clang-format on typedef struct { int32_t numOfConn; @@ -308,19 +311,6 @@ static void cliWalkCb(uv_handle_t* handle, void* arg); } \ } while (0) -#define EPSET_DEBUG_STR(epSet, tbuf) \ - do { \ - int len = snprintf(tbuf, sizeof(tbuf), "epset:{"); \ - for (int i = 0; i < (epSet)->numOfEps; i++) { \ - if (i == (epSet)->numOfEps - 1) { \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } else { \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } \ - } \ - len += snprintf(tbuf + len, sizeof(tbuf) - len, "}, inUse:%d", (epSet)->inUse); \ - } while (0); - static void* cliWorkThread(void* arg); static void cliReleaseUnfinishedMsg(SCliConn* conn) { @@ -1268,7 +1258,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - if (status == -1) status = ENETUNREACH; + if (status == -1) status = UV_EADDRNOTAVAIL; if (pConn->pBatch == NULL) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); @@ -2167,7 +2157,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { if (rpcDebugFlag & DEBUG_DEBUG) { STraceId* trace = &pMsg->msg.info.traceId; char tbuf[256] = {0}; - EPSET_DEBUG_STR(&pCtx->epSet, tbuf); + EPSET_TO_STR(&pCtx->epSet, tbuf); tGDebug("%s retry on next node,use:%s, step: %d,timeout:%" PRId64 "", transLabel(pThrd->pTransInst), tbuf, pCtx->retryStep, pCtx->retryNextInterval); } @@ -2396,7 +2386,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (hasEpSet) { if (rpcDebugFlag & DEBUG_TRACE) { char tbuf[256] = {0}; - EPSET_DEBUG_STR(&pCtx->epSet, tbuf); + EPSET_TO_STR(&pCtx->epSet, tbuf); tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); } } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 40610d7651..a53830723c 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -196,8 +196,6 @@ static bool uvHandleReq(SSvrConn* pConn) { tError("%s conn %p recv invalid packet, failed to decompress", transLabel(pTransInst), pConn); return false; } - tDebug("head version: %d 2", pHead->version); - pHead->code = htonl(pHead->code); pHead->msgLen = htonl(pHead->msgLen); @@ -727,7 +725,6 @@ void uvOnAcceptCb(uv_stream_t* stream, int status) { } } void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) { - tTrace("connection coming"); if (nread < 0) { if (nread != UV_EOF) { tError("read error %s", uv_err_name(nread)); diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index f5e15e7436..8e7c0f9584 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -319,7 +319,7 @@ SArray* taosArrayDup(const SArray* pSrc, __array_item_dup_fn_t fn) { if (NULL == pSrc) { return NULL; } - + if (pSrc->size == 0) { // empty array list return taosArrayInit(8, pSrc->elemSize); } @@ -360,6 +360,23 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*)) { pArray->size = 0; } +void taosArrayClearP(SArray* pArray, void (*fp)(void*)) { + // if (pArray == NULL) return; + // if (fp == NULL) { + // pArray->size = 0; + // return; + // } + + // for (int32_t i = 0; i < pArray->size; ++i) { + // fp(TARRAY_GET_ELEM(pArray, i)); + // } + if (pArray) { + for (int32_t i = 0; i < pArray->size; i++) { + fp(*(void**)TARRAY_GET_ELEM(pArray, i)); + } + } + taosArrayClear(pArray); +} void* taosArrayDestroy(SArray* pArray) { if (pArray) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 9281eaae11..a02657a543 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -546,7 +546,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_TIMELINE_FUNC, "Invalid timeline fu TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_PASSWD, "Invalid password") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_ALTER_TABLE, "Invalid alter table statement") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_CANNOT_DROP_PRIMARY_KEY, "Primary timestamp column cannot be dropped") -TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_MODIFY_COL, "Only binary/nchar/geometry column length could be modified, and the length can only be increased, not decreased") +TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_MODIFY_COL, "Only varbinary/binary/nchar/geometry column length could be modified, and the length can only be increased, not decreased") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_TBNAME, "Invalid tbname pseudo column") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_FUNCTION_NAME, "Invalid function name") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_COMMENT_TOO_LONG, "Comment too long") diff --git a/source/util/src/tref.c b/source/util/src/tref.c index e70e12b37b..1bd3099b2d 100644 --- a/source/util/src/tref.c +++ b/source/util/src/tref.c @@ -181,7 +181,8 @@ int64_t taosAddRef(int32_t rsetId, void *p) { if (pSet->nodeList[hash]) pSet->nodeList[hash]->prev = pNode; pSet->nodeList[hash] = pNode; - uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d", rsetId, p, rid, pSet->count); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is added, count:%d, remain count:%d", rsetId, p, rid, pSet->count, + pNode->count); taosUnlockList(pSet->lockedBy + hash); @@ -235,7 +236,7 @@ void *taosAcquireRef(int32_t rsetId, int64_t rid) { if (pNode->removed == 0) { pNode->count++; p = pNode->p; - uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired", rsetId, pNode->p, rid); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is acquired, remain count:%d", rsetId, pNode->p, rid, pNode->count); } else { terrno = TSDB_CODE_REF_NOT_EXIST; uTrace("rsetId:%d p:%p rid:%" PRId64 " is already removed, failed to acquire", rsetId, pNode->p, rid); diff --git a/stream b/stream deleted file mode 100644 index 3eafb580a3..0000000000 --- a/stream +++ /dev/null @@ -1,1137 +0,0 @@ -3986:03/29 15:26:32.540895 00099488 QRY streamStateSetNumber, seq: 1 -3987:03/29 15:26:32.540937 00099488 QRY streamStateSetNumber, seq: 2 -4437:03/29 15:26:32.601303 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -4438:03/29 15:26:32.601553 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -4439:03/29 15:26:32.601569 00099504 QRY streamStateReleaseBuf -4447:03/29 15:26:32.601725 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -4448:03/29 15:26:32.601752 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -4452:03/29 15:26:32.601817 00099504 QRY streamStateReleaseBuf -4460:03/29 15:26:32.601919 00099504 QRY streamStateGetCur_rocksdb -4461:03/29 15:26:32.601965 00099504 QRY streamStateCurPrev_rocksdb -4462:03/29 15:26:32.602051 00099504 QRY streamStateGetKVByCur_rocksdb -4464:03/29 15:26:32.602085 00099504 QRY streamStateFreeCur -4956:03/29 15:26:32.815478 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -5010:03/29 15:26:32.816584 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -5018:03/29 15:26:32.816701 00099504 QRY streamStateGetCur_rocksdb -5019:03/29 15:26:32.816760 00099504 QRY streamStateFreeCur -5020:03/29 15:26:32.816785 00099504 QRY streamStateCurPrev_rocksdb -5540:03/29 15:26:33.041742 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -5541:03/29 15:26:33.041831 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -5542:03/29 15:26:33.041853 00099504 QRY streamStateReleaseBuf -5549:03/29 15:26:33.041979 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -5595:03/29 15:26:33.043127 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -5596:03/29 15:26:33.043186 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -5597:03/29 15:26:33.043206 00099504 QRY streamStateReleaseBuf -5626:03/29 15:26:33.044578 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -5627:03/29 15:26:33.044611 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -5631:03/29 15:26:33.044676 00099504 QRY streamStateReleaseBuf -5639:03/29 15:26:33.044769 00099504 QRY streamStateGetCur_rocksdb -5640:03/29 15:26:33.044817 00099504 QRY streamStateCurPrev_rocksdb -5641:03/29 15:26:33.044842 00099504 QRY streamStateGetKVByCur_rocksdb -5643:03/29 15:26:33.044940 00099504 QRY streamStateFreeCur -6298:03/29 15:26:33.270625 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6303:03/29 15:26:33.270831 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6304:03/29 15:26:33.271088 00099504 QRY streamStateReleaseBuf -6352:03/29 15:26:33.272286 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -6402:03/29 15:26:33.274181 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -6403:03/29 15:26:33.274262 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6404:03/29 15:26:33.274284 00099504 QRY streamStateReleaseBuf -6427:03/29 15:26:33.274651 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6428:03/29 15:26:33.274676 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -6432:03/29 15:26:33.274719 00099504 QRY streamStateReleaseBuf -6440:03/29 15:26:33.275020 00099504 QRY streamStateGetCur_rocksdb -6441:03/29 15:26:33.275072 00099504 QRY streamStateCurPrev_rocksdb -6442:03/29 15:26:33.275088 00099504 QRY streamStateGetKVByCur_rocksdb -6444:03/29 15:26:33.275106 00099504 QRY streamStateFreeCur -6754:03/29 15:26:33.280321 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6755:03/29 15:26:33.280420 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -6756:03/29 15:26:33.280440 00099504 QRY streamStateReleaseBuf -6764:03/29 15:26:33.280565 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -6765:03/29 15:26:33.280586 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -6769:03/29 15:26:33.280618 00099504 QRY streamStateReleaseBuf -6777:03/29 15:26:33.280705 00099504 QRY streamStateGetCur_rocksdb -6778:03/29 15:26:33.280760 00099504 QRY streamStateCurPrev_rocksdb -6779:03/29 15:26:33.280786 00099504 QRY streamStateGetKVByCur_rocksdb -6781:03/29 15:26:33.280889 00099504 QRY streamStateFreeCur -6999:03/29 15:26:33.286714 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7000:03/29 15:26:33.286811 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7001:03/29 15:26:33.286831 00099504 QRY streamStateReleaseBuf -7009:03/29 15:26:33.287090 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7010:03/29 15:26:33.287123 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7014:03/29 15:26:33.287196 00099504 QRY streamStateReleaseBuf -7022:03/29 15:26:33.287315 00099504 QRY streamStateGetCur_rocksdb -7023:03/29 15:26:33.287370 00099504 QRY streamStateCurPrev_rocksdb -7024:03/29 15:26:33.287395 00099504 QRY streamStateGetKVByCur_rocksdb -7026:03/29 15:26:33.287437 00099504 QRY streamStateFreeCur -7324:03/29 15:26:33.498096 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -7374:03/29 15:26:33.499363 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -7375:03/29 15:26:33.499439 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7376:03/29 15:26:33.499462 00099504 QRY streamStateReleaseBuf -7408:03/29 15:26:33.500618 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -7409:03/29 15:26:33.500674 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7413:03/29 15:26:33.500778 00099504 QRY streamStateReleaseBuf -7421:03/29 15:26:33.501007 00099504 QRY streamStateGetCur_rocksdb -7422:03/29 15:26:33.501071 00099504 QRY streamStateCurPrev_rocksdb -7423:03/29 15:26:33.501105 00099504 QRY streamStateGetKVByCur_rocksdb -7425:03/29 15:26:33.501158 00099504 QRY streamStateFreeCur -7920:03/29 15:26:33.722388 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -7921:03/29 15:26:33.722476 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -7922:03/29 15:26:33.722492 00099504 QRY streamStateReleaseBuf -7930:03/29 15:26:33.722638 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -7931:03/29 15:26:33.722659 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -7935:03/29 15:26:33.722727 00099504 QRY streamStateReleaseBuf -7943:03/29 15:26:33.722841 00099504 QRY streamStateGetCur_rocksdb -7944:03/29 15:26:33.722962 00099504 QRY streamStateCurPrev_rocksdb -7945:03/29 15:26:33.722998 00099504 QRY streamStateGetKVByCur_rocksdb -7947:03/29 15:26:33.723024 00099504 QRY streamStateFreeCur -8177:03/29 15:26:33.728574 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8178:03/29 15:26:33.728652 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8179:03/29 15:26:33.728663 00099504 QRY streamStateReleaseBuf -8197:03/29 15:26:33.729022 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8198:03/29 15:26:33.729077 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8202:03/29 15:26:33.729177 00099504 QRY streamStateReleaseBuf -8211:03/29 15:26:33.729312 00099504 QRY streamStateGetCur_rocksdb -8213:03/29 15:26:33.729372 00099504 QRY streamStateCurPrev_rocksdb -8214:03/29 15:26:33.729427 00099504 QRY streamStateGetKVByCur_rocksdb -8216:03/29 15:26:33.729456 00099504 QRY streamStateFreeCur -8460:03/29 15:26:33.735166 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8461:03/29 15:26:33.735260 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8462:03/29 15:26:33.735282 00099504 QRY streamStateReleaseBuf -8470:03/29 15:26:33.735474 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8471:03/29 15:26:33.735516 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8475:03/29 15:26:33.735586 00099504 QRY streamStateReleaseBuf -8483:03/29 15:26:33.735713 00099504 QRY streamStateGetCur_rocksdb -8484:03/29 15:26:33.735783 00099504 QRY streamStateCurPrev_rocksdb -8485:03/29 15:26:33.735812 00099504 QRY streamStateGetKVByCur_rocksdb -8487:03/29 15:26:33.735845 00099504 QRY streamStateFreeCur -8717:03/29 15:26:33.741222 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8718:03/29 15:26:33.741315 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -8719:03/29 15:26:33.741337 00099504 QRY streamStateReleaseBuf -8727:03/29 15:26:33.741534 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -8728:03/29 15:26:33.741576 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -8732:03/29 15:26:33.741669 00099504 QRY streamStateReleaseBuf -8740:03/29 15:26:33.741779 00099504 QRY streamStateGetCur_rocksdb -8741:03/29 15:26:33.741912 00099504 QRY streamStateCurPrev_rocksdb -8742:03/29 15:26:33.741938 00099504 QRY streamStateGetKVByCur_rocksdb -8744:03/29 15:26:33.741968 00099504 QRY streamStateFreeCur -9392:03/29 15:26:34.177492 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -9446:03/29 15:26:34.178584 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -9454:03/29 15:26:34.178719 00099504 QRY streamStateGetCur_rocksdb -9455:03/29 15:26:34.178777 00099504 QRY streamStateCurPrev_rocksdb -9456:03/29 15:26:34.178803 00099504 QRY streamStateGetKVByCur_rocksdb -9458:03/29 15:26:34.178844 00099504 QRY streamStateFreeCur -10007:03/29 15:26:34.396214 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10008:03/29 15:26:34.396312 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10009:03/29 15:26:34.396334 00099504 QRY streamStateReleaseBuf -10016:03/29 15:26:34.396441 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10063:03/29 15:26:34.397335 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10064:03/29 15:26:34.397396 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10065:03/29 15:26:34.397413 00099504 QRY streamStateReleaseBuf -10099:03/29 15:26:34.398077 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10104:03/29 15:26:34.398102 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10109:03/29 15:26:34.398252 00099504 QRY streamStateReleaseBuf -10121:03/29 15:26:34.398444 00099504 QRY streamStateGetCur_rocksdb -10125:03/29 15:26:34.398501 00099504 QRY streamStateCurPrev_rocksdb -10126:03/29 15:26:34.398614 00099504 QRY streamStateGetKVByCur_rocksdb -10135:03/29 15:26:34.398654 00099504 QRY streamStateFreeCur -10306:03/29 15:26:34.402709 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10307:03/29 15:26:34.402803 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10308:03/29 15:26:34.402825 00099504 QRY streamStateReleaseBuf -10315:03/29 15:26:34.402978 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10385:03/29 15:26:34.404520 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10390:03/29 15:26:34.404599 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10392:03/29 15:26:34.404623 00099504 QRY streamStateReleaseBuf -10447:03/29 15:26:34.405786 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10448:03/29 15:26:34.405806 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10452:03/29 15:26:34.405862 00099504 QRY streamStateReleaseBuf -10460:03/29 15:26:34.405976 00099504 QRY streamStateGetCur_rocksdb -10461:03/29 15:26:34.406033 00099504 QRY streamStateCurPrev_rocksdb -10462:03/29 15:26:34.406059 00099504 QRY streamStateGetKVByCur_rocksdb -10464:03/29 15:26:34.406085 00099504 QRY streamStateFreeCur -10613:03/29 15:26:34.409146 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10615:03/29 15:26:34.409218 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10616:03/29 15:26:34.409266 00099504 QRY streamStateReleaseBuf -10626:03/29 15:26:34.409404 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -10708:03/29 15:26:34.410801 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -10710:03/29 15:26:34.410940 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10712:03/29 15:26:34.410959 00099504 QRY streamStateReleaseBuf -10754:03/29 15:26:34.411727 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10755:03/29 15:26:34.411750 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -10759:03/29 15:26:34.411828 00099504 QRY streamStateReleaseBuf -10767:03/29 15:26:34.411929 00099504 QRY streamStateGetCur_rocksdb -10768:03/29 15:26:34.411976 00099504 QRY streamStateCurPrev_rocksdb -10769:03/29 15:26:34.412065 00099504 QRY streamStateGetKVByCur_rocksdb -10771:03/29 15:26:34.412093 00099504 QRY streamStateFreeCur -10919:03/29 15:26:34.415336 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -10924:03/29 15:26:34.415401 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -10925:03/29 15:26:34.415530 00099504 QRY streamStateReleaseBuf -10939:03/29 15:26:34.415814 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -11037:03/29 15:26:34.417653 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -11038:03/29 15:26:34.417730 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11039:03/29 15:26:34.417749 00099504 QRY streamStateReleaseBuf -11062:03/29 15:26:34.418366 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -11063:03/29 15:26:34.418387 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11067:03/29 15:26:34.418436 00099504 QRY streamStateReleaseBuf -11075:03/29 15:26:34.418564 00099504 QRY streamStateGetCur_rocksdb -11076:03/29 15:26:34.418619 00099504 QRY streamStateCurPrev_rocksdb -11077:03/29 15:26:34.418642 00099504 QRY streamStateGetKVByCur_rocksdb -11079:03/29 15:26:34.418671 00099504 QRY streamStateFreeCur -11266:03/29 15:26:34.421834 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11271:03/29 15:26:34.421905 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11273:03/29 15:26:34.421927 00099504 QRY streamStateReleaseBuf -11287:03/29 15:26:34.422121 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -11342:03/29 15:26:34.423145 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11343:03/29 15:26:34.423200 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11344:03/29 15:26:34.423217 00099504 QRY streamStateReleaseBuf -11367:03/29 15:26:34.423626 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11368:03/29 15:26:34.423647 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11372:03/29 15:26:34.423700 00099504 QRY streamStateReleaseBuf -11380:03/29 15:26:34.423816 00099504 QRY streamStateGetCur_rocksdb -11381:03/29 15:26:34.423856 00099504 QRY streamStateCurPrev_rocksdb -11382:03/29 15:26:34.423935 00099504 QRY streamStateGetKVByCur_rocksdb -11384:03/29 15:26:34.423968 00099504 QRY streamStateFreeCur -11678:03/29 15:26:34.428786 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11680:03/29 15:26:34.428835 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11681:03/29 15:26:34.428879 00099504 QRY streamStateReleaseBuf -11693:03/29 15:26:34.429033 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -11744:03/29 15:26:34.429959 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -11745:03/29 15:26:34.430023 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -11746:03/29 15:26:34.430042 00099504 QRY streamStateReleaseBuf -11769:03/29 15:26:34.430502 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -11770:03/29 15:26:34.430533 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -11774:03/29 15:26:34.430602 00099504 QRY streamStateReleaseBuf -11782:03/29 15:26:34.430710 00099504 QRY streamStateGetCur_rocksdb -11783:03/29 15:26:34.430752 00099504 QRY streamStateCurPrev_rocksdb -11784:03/29 15:26:34.430773 00099504 QRY streamStateGetKVByCur_rocksdb -11786:03/29 15:26:34.430787 00099504 QRY streamStateFreeCur -11999:03/29 15:26:34.434262 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -12000:03/29 15:26:34.434332 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12001:03/29 15:26:34.434348 00099504 QRY streamStateReleaseBuf -12008:03/29 15:26:34.434438 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -12061:03/29 15:26:34.435131 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] failed to read from default, err: not exist -12062:03/29 15:26:34.435269 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12063:03/29 15:26:34.435281 00099504 QRY streamStateReleaseBuf -12098:03/29 15:26:34.435777 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to read from default -12099:03/29 15:26:34.435795 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12108:03/29 15:26:34.435970 00099504 QRY streamStateReleaseBuf -12126:03/29 15:26:34.436125 00099504 QRY streamStateGetCur_rocksdb -12131:03/29 15:26:34.436229 00099504 QRY streamStateCurPrev_rocksdb -12132:03/29 15:26:34.436312 00099504 QRY streamStateGetKVByCur_rocksdb -12134:03/29 15:26:34.436333 00099504 QRY streamStateFreeCur -12305:03/29 15:26:34.439031 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] failed to read from default, err: not exist -12306:03/29 15:26:34.439071 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12307:03/29 15:26:34.439085 00099504 QRY streamStateReleaseBuf -12315:03/29 15:26:34.439182 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12316:03/29 15:26:34.439203 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12320:03/29 15:26:34.439259 00099504 QRY streamStateReleaseBuf -12328:03/29 15:26:34.439338 00099504 QRY streamStateGetCur_rocksdb -12329:03/29 15:26:34.439364 00099504 QRY streamStateCurPrev_rocksdb -12330:03/29 15:26:34.439371 00099504 QRY streamStateGetKVByCur_rocksdb -12332:03/29 15:26:34.439397 00099504 QRY streamStateFreeCur -12538:03/29 15:26:34.442517 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12539:03/29 15:26:34.442565 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12540:03/29 15:26:34.442581 00099504 QRY streamStateReleaseBuf -12548:03/29 15:26:34.442693 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12549:03/29 15:26:34.442715 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12553:03/29 15:26:34.442767 00099504 QRY streamStateReleaseBuf -12561:03/29 15:26:34.442840 00099504 QRY streamStateGetCur_rocksdb -12562:03/29 15:26:34.442871 00099504 QRY streamStateCurPrev_rocksdb -12563:03/29 15:26:34.442885 00099504 QRY streamStateGetKVByCur_rocksdb -12565:03/29 15:26:34.442911 00099504 QRY streamStateFreeCur -12794:03/29 15:26:34.446874 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12795:03/29 15:26:34.446925 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -12796:03/29 15:26:34.446938 00099504 QRY streamStateReleaseBuf -12804:03/29 15:26:34.447053 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -12805:03/29 15:26:34.447122 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -12809:03/29 15:26:34.447183 00099504 QRY streamStateReleaseBuf -12817:03/29 15:26:34.447263 00099504 QRY streamStateGetCur_rocksdb -12818:03/29 15:26:34.447295 00099504 QRY streamStateCurPrev_rocksdb -12819:03/29 15:26:34.447311 00099504 QRY streamStateGetKVByCur_rocksdb -12821:03/29 15:26:34.447336 00099504 QRY streamStateFreeCur -13002:03/29 15:26:34.449990 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13028:03/29 15:26:34.450040 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13029:03/29 15:26:34.450506 00099504 QRY streamStateReleaseBuf -13037:03/29 15:26:34.450627 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13038:03/29 15:26:34.450642 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13042:03/29 15:26:34.450681 00099504 QRY streamStateReleaseBuf -13050:03/29 15:26:34.450772 00099504 QRY streamStateGetCur_rocksdb -13051:03/29 15:26:34.450806 00099504 QRY streamStateCurPrev_rocksdb -13052:03/29 15:26:34.450825 00099504 QRY streamStateGetKVByCur_rocksdb -13054:03/29 15:26:34.450845 00099504 QRY streamStateFreeCur -13375:03/29 15:26:34.456264 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to del from default -13376:03/29 15:26:34.456301 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 2] succ to del from default -13377:03/29 15:26:34.456339 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to del from default -13425:03/29 15:26:34.456949 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] failed to read from default, err: not exist -13426:03/29 15:26:34.456986 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13427:03/29 15:26:34.456999 00099504 QRY streamStateReleaseBuf -13428:03/29 15:26:34.457026 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] failed to read from default, err: not exist -13429:03/29 15:26:34.457072 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to write to default, valLen:135 -13430:03/29 15:26:34.457089 00099504 QRY streamStateReleaseBuf -13452:03/29 15:26:34.457437 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13460:03/29 15:26:34.457548 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 2] succ to read from default -13461:03/29 15:26:34.457569 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13465:03/29 15:26:34.457632 00099504 QRY streamStateReleaseBuf -13466:03/29 15:26:34.457659 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 2] succ to read from default -13467:03/29 15:26:34.457680 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -13471:03/29 15:26:34.457742 00099504 QRY streamStateReleaseBuf -13480:03/29 15:26:34.457838 00099504 QRY streamStateGetCur_rocksdb -13481:03/29 15:26:34.457875 00099504 QRY streamStateCurPrev_rocksdb -13482:03/29 15:26:34.457894 00099504 QRY streamStateGetKVByCur_rocksdb -13484:03/29 15:26:34.457925 00099504 QRY streamStateFreeCur -24389:03/29 15:26:35.766200 00099571 QRY streamStateSetNumber, seq: 3 -24390:03/29 15:26:35.766226 00099571 QRY streamStateSetNumber, seq: 4 -24406:03/29 15:26:35.768111 00099577 QRY streamStateSetNumber, seq: 5 -24407:03/29 15:26:35.768140 00099577 QRY streamStateSetNumber, seq: 6 -24423:03/29 15:26:35.768472 00099565 QRY streamStateSetNumber, seq: 7 -24424:03/29 15:26:35.768494 00099565 QRY streamStateSetNumber, seq: 8 -24427:03/29 15:26:35.769698 00099560 QRY streamStateSetNumber, seq: 9 -24428:03/29 15:26:35.769809 00099560 QRY streamStateSetNumber, seq: 10 -24429:03/29 15:26:35.769968 00099560 QRY streamStateSetNumber, seq: 11 -24430:03/29 15:26:35.770071 00099560 QRY streamStateSetNumber, seq: 12 -24431:03/29 15:26:35.770183 00099560 QRY streamStateSetNumber, seq: 13 -24432:03/29 15:26:35.770289 00099560 QRY streamStateSetNumber, seq: 14 -24433:03/29 15:26:35.770401 00099560 QRY streamStateSetNumber, seq: 15 -24434:03/29 15:26:35.770510 00099560 QRY streamStateSetNumber, seq: 16 -24435:03/29 15:26:35.770621 00099560 QRY streamStateSetNumber, seq: 17 -24698:03/29 15:26:35.877143 00099560 QRY streamStateSetNumber, seq: 18 -24699:03/29 15:26:35.877173 00099560 QRY streamStateSetNumber, seq: 19 -25022:03/29 15:26:35.935568 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -25023:03/29 15:26:35.936534 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -25024:03/29 15:26:35.936554 00099504 QRY streamStateReleaseBuf -25031:03/29 15:26:35.936693 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -25032:03/29 15:26:35.936721 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25036:03/29 15:26:35.936786 00099504 QRY streamStateReleaseBuf -25045:03/29 15:26:35.936910 00099504 QRY streamStateClear_rocksdb seq:19 -25131:03/29 15:26:35.940598 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -25150:03/29 15:26:35.941187 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -25151:03/29 15:26:35.941206 00099569 QRY streamStateReleaseBuf -25152:03/29 15:26:35.941228 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -25153:03/29 15:26:35.941272 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -25154:03/29 15:26:35.941289 00099569 QRY streamStateReleaseBuf -25163:03/29 15:26:35.941439 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25164:03/29 15:26:35.941454 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25168:03/29 15:26:35.941496 00099569 QRY streamStateReleaseBuf -25177:03/29 15:26:35.941606 00099569 QRY streamStateGetCur_rocksdb -25178:03/29 15:26:35.941647 00099569 QRY streamStateCurPrev_rocksdb -25179:03/29 15:26:35.941662 00099569 QRY streamStateGetKVByCur_rocksdb -25181:03/29 15:26:35.941683 00099569 QRY streamStateFreeCur -25454:03/29 15:26:35.946084 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -25604:03/29 15:26:35.948604 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -25605:03/29 15:26:35.948628 00099593 QRY streamStateReleaseBuf -25612:03/29 15:26:35.948791 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -25613:03/29 15:26:35.948825 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25617:03/29 15:26:35.948957 00099593 QRY streamStateReleaseBuf -25626:03/29 15:26:35.949089 00099593 QRY streamStateClear_rocksdb seq:8 -25719:03/29 15:26:35.951052 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25725:03/29 15:26:35.951164 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -25726:03/29 15:26:35.951176 00099575 QRY streamStateReleaseBuf -25727:03/29 15:26:35.951218 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -25729:03/29 15:26:35.951260 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -25731:03/29 15:26:35.951276 00099575 QRY streamStateReleaseBuf -25740:03/29 15:26:35.951424 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -25741:03/29 15:26:35.951448 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -25745:03/29 15:26:35.951498 00099575 QRY streamStateReleaseBuf -25754:03/29 15:26:35.951646 00099575 QRY streamStateGetCur_rocksdb -25755:03/29 15:26:35.951691 00099575 QRY streamStateCurPrev_rocksdb -25756:03/29 15:26:35.951715 00099575 QRY streamStateGetKVByCur_rocksdb -25758:03/29 15:26:35.951746 00099575 QRY streamStateFreeCur -26929:03/29 15:26:36.385048 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -26935:03/29 15:26:36.385099 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27075:03/29 15:26:36.389048 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -27076:03/29 15:26:36.389266 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -27077:03/29 15:26:36.389305 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -27078:03/29 15:26:36.389330 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -27079:03/29 15:26:36.389362 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -27080:03/29 15:26:36.389386 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -27081:03/29 15:26:36.389410 00099569 QRY streamStateReleaseBuf -27082:03/29 15:26:36.389448 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -27083:03/29 15:26:36.389481 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -27087:03/29 15:26:36.389770 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -27088:03/29 15:26:36.389978 00099569 QRY streamStateReleaseBuf -27109:03/29 15:26:36.390413 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -27115:03/29 15:26:36.390538 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27121:03/29 15:26:36.390772 00099569 QRY streamStateReleaseBuf -27130:03/29 15:26:36.391003 00099569 QRY streamStateGetCur_rocksdb -27131:03/29 15:26:36.391083 00099569 QRY streamStateCurPrev_rocksdb -27132:03/29 15:26:36.391119 00099569 QRY streamStateGetKVByCur_rocksdb -27134:03/29 15:26:36.391162 00099569 QRY streamStateFreeCur -27712:03/29 15:26:36.613687 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -27713:03/29 15:26:36.613804 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -27714:03/29 15:26:36.613888 00099593 QRY streamStateReleaseBuf -27721:03/29 15:26:36.614032 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -27722:03/29 15:26:36.614063 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27726:03/29 15:26:36.614139 00099593 QRY streamStateReleaseBuf -27735:03/29 15:26:36.614284 00099593 QRY streamStateClear_rocksdb seq:19 -27919:03/29 15:26:36.617683 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -27925:03/29 15:26:36.617772 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -27927:03/29 15:26:36.617797 00099569 QRY streamStateReleaseBuf -27932:03/29 15:26:36.617832 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -27934:03/29 15:26:36.617940 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -27935:03/29 15:26:36.617987 00099569 QRY streamStateReleaseBuf -27948:03/29 15:26:36.618231 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -27949:03/29 15:26:36.618278 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -27958:03/29 15:26:36.618525 00099569 QRY streamStateReleaseBuf -27982:03/29 15:26:36.618888 00099569 QRY streamStateGetCur_rocksdb -27994:03/29 15:26:36.619064 00099569 QRY streamStateCurPrev_rocksdb -27997:03/29 15:26:36.619671 00099569 QRY streamStateGetKVByCur_rocksdb -27999:03/29 15:26:36.619743 00099569 QRY streamStateFreeCur -28078:03/29 15:26:36.621690 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -28082:03/29 15:26:36.621782 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -28083:03/29 15:26:36.621794 00099575 QRY streamStateReleaseBuf -28132:03/29 15:26:36.622671 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -28200:03/29 15:26:36.623624 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28204:03/29 15:26:36.625551 00099575 QRY streamStateReleaseBuf -28232:03/29 15:26:36.626171 00099575 QRY streamStateClear_rocksdb seq:19 -28386:03/29 15:26:36.629674 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28387:03/29 15:26:36.629751 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -28388:03/29 15:26:36.629773 00099504 QRY streamStateReleaseBuf -28389:03/29 15:26:36.629807 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -28390:03/29 15:26:36.629932 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -28391:03/29 15:26:36.629955 00099504 QRY streamStateReleaseBuf -28400:03/29 15:26:36.630174 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28401:03/29 15:26:36.630206 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28405:03/29 15:26:36.630291 00099504 QRY streamStateReleaseBuf -28414:03/29 15:26:36.630450 00099504 QRY streamStateGetCur_rocksdb -28415:03/29 15:26:36.630505 00099504 QRY streamStateCurPrev_rocksdb -28416:03/29 15:26:36.630539 00099504 QRY streamStateGetKVByCur_rocksdb -28418:03/29 15:26:36.630581 00099504 QRY streamStateFreeCur -28729:03/29 15:26:36.637120 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -28730:03/29 15:26:36.637380 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -28731:03/29 15:26:36.637416 00099575 QRY streamStateReleaseBuf -28738:03/29 15:26:36.637723 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -28739:03/29 15:26:36.637749 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -28743:03/29 15:26:36.637863 00099575 QRY streamStateReleaseBuf -28752:03/29 15:26:36.638110 00099575 QRY streamStateClear_rocksdb seq:19 -28941:03/29 15:26:36.643526 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28951:03/29 15:26:36.643634 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -28954:03/29 15:26:36.643786 00099569 QRY streamStateReleaseBuf -28955:03/29 15:26:36.643949 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -28959:03/29 15:26:36.643996 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -28961:03/29 15:26:36.644018 00099569 QRY streamStateReleaseBuf -28979:03/29 15:26:36.644396 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -28980:03/29 15:26:36.644412 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29003:03/29 15:26:36.644735 00099569 QRY streamStateReleaseBuf -29047:03/29 15:26:36.645726 00099569 QRY streamStateGetCur_rocksdb -29049:03/29 15:26:36.645783 00099569 QRY streamStateCurPrev_rocksdb -29051:03/29 15:26:36.645816 00099569 QRY streamStateGetKVByCur_rocksdb -29056:03/29 15:26:36.645908 00099569 QRY streamStateFreeCur -29118:03/29 15:26:36.646825 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -29132:03/29 15:26:36.647146 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -29143:03/29 15:26:36.647241 00099575 QRY streamStateReleaseBuf -29218:03/29 15:26:36.648091 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -29220:03/29 15:26:36.648376 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29245:03/29 15:26:36.648720 00099575 QRY streamStateReleaseBuf -29292:03/29 15:26:36.649340 00099575 QRY streamStateClear_rocksdb seq:19 -29338:03/29 15:26:36.650291 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -29341:03/29 15:26:36.650366 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -29342:03/29 15:26:36.650401 00099504 QRY streamStateReleaseBuf -29387:03/29 15:26:36.651129 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -29388:03/29 15:26:36.651172 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29392:03/29 15:26:36.651221 00099504 QRY streamStateReleaseBuf -29424:03/29 15:26:36.652291 00099504 QRY streamStateClear_rocksdb seq:8 -29494:03/29 15:26:36.653157 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29495:03/29 15:26:36.653218 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -29496:03/29 15:26:36.653235 00099569 QRY streamStateReleaseBuf -29497:03/29 15:26:36.653261 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -29498:03/29 15:26:36.653387 00099569 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -29499:03/29 15:26:36.653409 00099569 QRY streamStateReleaseBuf -29508:03/29 15:26:36.653558 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29509:03/29 15:26:36.653585 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -29513:03/29 15:26:36.653659 00099569 QRY streamStateReleaseBuf -29522:03/29 15:26:36.653787 00099569 QRY streamStateGetCur_rocksdb -29523:03/29 15:26:36.653835 00099569 QRY streamStateCurPrev_rocksdb -29524:03/29 15:26:36.653866 00099569 QRY streamStateGetKVByCur_rocksdb -29526:03/29 15:26:36.653899 00099569 QRY streamStateFreeCur -29929:03/29 15:26:36.660341 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -29943:03/29 15:26:36.661005 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -29944:03/29 15:26:36.661020 00099575 QRY streamStateReleaseBuf -29956:03/29 15:26:36.661065 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -29958:03/29 15:26:36.661320 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -29960:03/29 15:26:36.661338 00099575 QRY streamStateReleaseBuf -30026:03/29 15:26:36.662339 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30027:03/29 15:26:36.662428 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30031:03/29 15:26:36.662458 00099575 QRY streamStateReleaseBuf -30040:03/29 15:26:36.662523 00099575 QRY streamStateGetCur_rocksdb -30044:03/29 15:26:36.662562 00099575 QRY streamStateCurPrev_rocksdb -30045:03/29 15:26:36.662609 00099575 QRY streamStateGetKVByCur_rocksdb -30047:03/29 15:26:36.662630 00099575 QRY streamStateFreeCur -30061:03/29 15:26:36.662751 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -30064:03/29 15:26:36.662911 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30065:03/29 15:26:36.662947 00099504 QRY streamStateReleaseBuf -30077:03/29 15:26:36.663118 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30079:03/29 15:26:36.663142 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30083:03/29 15:26:36.663188 00099504 QRY streamStateReleaseBuf -30094:03/29 15:26:36.663331 00099504 QRY streamStateClear_rocksdb seq:8 -30428:03/29 15:26:36.668781 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -30434:03/29 15:26:36.668875 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30435:03/29 15:26:36.668887 00099504 QRY streamStateReleaseBuf -30438:03/29 15:26:36.668903 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30443:03/29 15:26:36.668961 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -30444:03/29 15:26:36.668975 00099593 QRY streamStateReleaseBuf -30446:03/29 15:26:36.668995 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -30477:03/29 15:26:36.669061 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -30482:03/29 15:26:36.669375 00099593 QRY streamStateReleaseBuf -30513:03/29 15:26:36.669729 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30514:03/29 15:26:36.669879 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30521:03/29 15:26:36.670104 00099593 QRY streamStateReleaseBuf -30538:03/29 15:26:36.670259 00099593 QRY streamStateGetCur_rocksdb -30542:03/29 15:26:36.670463 00099593 QRY streamStateCurPrev_rocksdb -30543:03/29 15:26:36.670570 00099593 QRY streamStateGetKVByCur_rocksdb -30553:03/29 15:26:36.670608 00099593 QRY streamStateFreeCur -30573:03/29 15:26:36.671023 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30587:03/29 15:26:36.671407 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -30588:03/29 15:26:36.671475 00099504 QRY streamStateReleaseBuf -30609:03/29 15:26:36.671716 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -30613:03/29 15:26:36.671877 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30635:03/29 15:26:36.672365 00099504 QRY streamStateReleaseBuf -30708:03/29 15:26:36.673055 00099504 QRY streamStateClear_rocksdb seq:8 -30830:03/29 15:26:36.676200 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30839:03/29 15:26:36.676304 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -30840:03/29 15:26:36.676475 00099575 QRY streamStateReleaseBuf -30841:03/29 15:26:36.676505 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -30847:03/29 15:26:36.676557 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -30848:03/29 15:26:36.676641 00099575 QRY streamStateReleaseBuf -30857:03/29 15:26:36.676795 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -30858:03/29 15:26:36.676816 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -30862:03/29 15:26:36.676842 00099575 QRY streamStateReleaseBuf -30875:03/29 15:26:36.677036 00099575 QRY streamStateGetCur_rocksdb -30876:03/29 15:26:36.677079 00099575 QRY streamStateCurPrev_rocksdb -30877:03/29 15:26:36.677110 00099575 QRY streamStateGetKVByCur_rocksdb -30879:03/29 15:26:36.677141 00099575 QRY streamStateFreeCur -31236:03/29 15:26:36.873472 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -31237:03/29 15:26:36.873518 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31307:03/29 15:26:36.874993 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -31308:03/29 15:26:36.875063 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -31309:03/29 15:26:36.875081 00099504 QRY streamStateReleaseBuf -31331:03/29 15:26:36.875547 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -31332:03/29 15:26:36.875580 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31336:03/29 15:26:36.875665 00099504 QRY streamStateReleaseBuf -31345:03/29 15:26:36.875785 00099504 QRY streamStateClear_rocksdb seq:8 -31424:03/29 15:26:36.877822 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -31430:03/29 15:26:36.877932 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -31436:03/29 15:26:36.878107 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -31437:03/29 15:26:36.878149 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -31438:03/29 15:26:36.878214 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -31439:03/29 15:26:36.878238 00099593 QRY streamStateReleaseBuf -31440:03/29 15:26:36.878261 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -31442:03/29 15:26:36.878286 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -31445:03/29 15:26:36.878317 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -31446:03/29 15:26:36.878553 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -31447:03/29 15:26:36.878576 00099593 QRY streamStateReleaseBuf -31453:03/29 15:26:36.878672 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -31454:03/29 15:26:36.878734 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -31455:03/29 15:26:36.878756 00099593 QRY streamStateReleaseBuf -31456:03/29 15:26:36.878787 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -31457:03/29 15:26:36.878851 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -31458:03/29 15:26:36.878966 00099593 QRY streamStateReleaseBuf -31467:03/29 15:26:36.879144 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -31468:03/29 15:26:36.879166 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -31472:03/29 15:26:36.879260 00099593 QRY streamStateReleaseBuf -31490:03/29 15:26:36.879535 00099593 QRY streamStateGetCur_rocksdb -31491:03/29 15:26:36.879594 00099593 QRY streamStateCurPrev_rocksdb -31492:03/29 15:26:36.879635 00099593 QRY streamStateGetKVByCur_rocksdb -31494:03/29 15:26:36.879677 00099593 QRY streamStateFreeCur -32790:03/29 15:26:37.312151 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to del from default -32791:03/29 15:26:37.312547 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -32956:03/29 15:26:37.316405 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -32958:03/29 15:26:37.316471 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -32990:03/29 15:26:37.316969 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -33005:03/29 15:26:37.317010 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to del from default -33007:03/29 15:26:37.317297 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -33008:03/29 15:26:37.317327 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -33017:03/29 15:26:37.317351 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -33020:03/29 15:26:37.317576 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -33029:03/29 15:26:37.317597 00099593 QRY streamStateReleaseBuf -33031:03/29 15:26:37.317809 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -33032:03/29 15:26:37.317862 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -33043:03/29 15:26:37.317920 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -33044:03/29 15:26:37.318013 00099593 QRY streamStateReleaseBuf -33065:03/29 15:26:37.318457 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -33066:03/29 15:26:37.318535 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -33070:03/29 15:26:37.318574 00099593 QRY streamStateReleaseBuf -33081:03/29 15:26:37.318869 00099593 QRY streamStateGetCur_rocksdb -33083:03/29 15:26:37.318927 00099593 QRY streamStateCurPrev_rocksdb -33084:03/29 15:26:37.318987 00099593 QRY streamStateGetKVByCur_rocksdb -33098:03/29 15:26:37.319222 00099593 QRY streamStateFreeCur -33227:03/29 15:26:37.322547 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -33233:03/29 15:26:37.322790 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -33242:03/29 15:26:37.323068 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -33253:03/29 15:26:37.323488 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -33255:03/29 15:26:37.324012 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -33266:03/29 15:26:37.324051 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -33292:03/29 15:26:37.325135 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -33341:03/29 15:26:37.325781 00099569 QRY streamStateGetCur_rocksdb -33342:03/29 15:26:37.325915 00099569 QRY streamStateCurPrev_rocksdb -33345:03/29 15:26:37.325981 00099569 QRY streamStateGetKVByCur_rocksdb -33347:03/29 15:26:37.326005 00099569 QRY streamStateFreeCur -33994:03/29 15:26:37.536707 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -33995:03/29 15:26:37.536789 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -33996:03/29 15:26:37.536808 00099593 QRY streamStateReleaseBuf -34003:03/29 15:26:37.536903 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -34004:03/29 15:26:37.536934 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34058:03/29 15:26:37.538122 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34059:03/29 15:26:37.538191 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34060:03/29 15:26:37.538213 00099593 QRY streamStateReleaseBuf -34101:03/29 15:26:37.539349 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -34103:03/29 15:26:37.539451 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34111:03/29 15:26:37.539595 00099593 QRY streamStateReleaseBuf -34131:03/29 15:26:37.540219 00099593 QRY streamStateClear_rocksdb seq:19 -34235:03/29 15:26:37.543707 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -34244:03/29 15:26:37.543761 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -34247:03/29 15:26:37.544063 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34249:03/29 15:26:37.544086 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34256:03/29 15:26:37.544113 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -34258:03/29 15:26:37.544228 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34260:03/29 15:26:37.544253 00099569 QRY streamStateReleaseBuf -34262:03/29 15:26:37.544289 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -34264:03/29 15:26:37.544314 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -34267:03/29 15:26:37.544364 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34268:03/29 15:26:37.544414 00099569 QRY streamStateReleaseBuf -34274:03/29 15:26:37.544525 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -34276:03/29 15:26:37.544619 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34278:03/29 15:26:37.544641 00099569 QRY streamStateReleaseBuf -34282:03/29 15:26:37.544672 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34287:03/29 15:26:37.544773 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -34290:03/29 15:26:37.544959 00099569 QRY streamStateReleaseBuf -34304:03/29 15:26:37.545517 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -34305:03/29 15:26:37.545543 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34312:03/29 15:26:37.545784 00099569 QRY streamStateReleaseBuf -34326:03/29 15:26:37.546147 00099569 QRY streamStateGetCur_rocksdb -34328:03/29 15:26:37.546207 00099569 QRY streamStateCurPrev_rocksdb -34331:03/29 15:26:37.546271 00099569 QRY streamStateGetKVByCur_rocksdb -34335:03/29 15:26:37.546314 00099569 QRY streamStateFreeCur -34551:03/29 15:26:37.550566 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34554:03/29 15:26:37.550759 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34559:03/29 15:26:37.550778 00099593 QRY streamStateReleaseBuf -34570:03/29 15:26:37.550993 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -34575:03/29 15:26:37.551021 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34690:03/29 15:26:37.552979 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] failed to read from default, err: not exist -34691:03/29 15:26:37.553221 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to write to default, valLen:135 -34692:03/29 15:26:37.553241 00099593 QRY streamStateReleaseBuf -34714:03/29 15:26:37.553789 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to read from default -34715:03/29 15:26:37.553814 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -34719:03/29 15:26:37.553955 00099593 QRY streamStateReleaseBuf -34728:03/29 15:26:37.554115 00099593 QRY streamStateClear_rocksdb seq:19 -34964:03/29 15:26:37.560528 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -34967:03/29 15:26:37.560634 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -34969:03/29 15:26:37.560670 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -34970:03/29 15:26:37.560727 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34971:03/29 15:26:37.560759 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -34976:03/29 15:26:37.560790 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -34977:03/29 15:26:37.561047 00099584 QRY streamStateReleaseBuf -34980:03/29 15:26:37.561079 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -34982:03/29 15:26:37.561152 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -34983:03/29 15:26:37.561237 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -34984:03/29 15:26:37.561255 00099584 QRY streamStateReleaseBuf -34997:03/29 15:26:37.561378 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -35002:03/29 15:26:37.561627 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -35004:03/29 15:26:37.561693 00099584 QRY streamStateReleaseBuf -35006:03/29 15:26:37.561744 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -35010:03/29 15:26:37.561791 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to write to default, valLen:159 -35032:03/29 15:26:37.561838 00099584 QRY streamStateReleaseBuf -35053:03/29 15:26:37.562919 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -35054:03/29 15:26:37.562944 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35060:03/29 15:26:37.563088 00099584 QRY streamStateReleaseBuf -35084:03/29 15:26:37.563823 00099584 QRY streamStateGetCur_rocksdb -35092:03/29 15:26:37.563931 00099584 QRY streamStateCurPrev_rocksdb -35095:03/29 15:26:37.564020 00099584 QRY streamStateGetKVByCur_rocksdb -35097:03/29 15:26:37.564127 00099584 QRY streamStateFreeCur -35221:03/29 15:26:37.565946 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -35227:03/29 15:26:37.567086 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -35235:03/29 15:26:37.567172 00099504 QRY streamStateReleaseBuf -35266:03/29 15:26:37.568306 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -35274:03/29 15:26:37.568338 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35292:03/29 15:26:37.568747 00099504 QRY streamStateReleaseBuf -35330:03/29 15:26:37.569446 00099504 QRY streamStateClear_rocksdb seq:19 -35495:03/29 15:26:37.573622 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -35496:03/29 15:26:37.573751 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -35497:03/29 15:26:37.573772 00099504 QRY streamStateReleaseBuf -35498:03/29 15:26:37.573794 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -35499:03/29 15:26:37.573845 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -35500:03/29 15:26:37.573920 00099504 QRY streamStateReleaseBuf -35509:03/29 15:26:37.574057 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -35510:03/29 15:26:37.574082 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35514:03/29 15:26:37.574122 00099504 QRY streamStateReleaseBuf -35524:03/29 15:26:37.574307 00099504 QRY streamStateGetCur_rocksdb -35526:03/29 15:26:37.574365 00099504 QRY streamStateCurPrev_rocksdb -35529:03/29 15:26:37.574548 00099504 QRY streamStateGetKVByCur_rocksdb -35540:03/29 15:26:37.574750 00099504 QRY streamStateFreeCur -35855:03/29 15:26:37.597332 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -35856:03/29 15:26:37.597444 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -35857:03/29 15:26:37.597462 00099575 QRY streamStateReleaseBuf -35864:03/29 15:26:37.597607 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -35865:03/29 15:26:37.597647 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35921:03/29 15:26:37.598924 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -35922:03/29 15:26:37.598993 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -35923:03/29 15:26:37.599012 00099575 QRY streamStateReleaseBuf -35953:03/29 15:26:37.599550 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -35954:03/29 15:26:37.599717 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -35961:03/29 15:26:37.600018 00099575 QRY streamStateReleaseBuf -35977:03/29 15:26:37.600259 00099575 QRY streamStateClear_rocksdb seq:8 -36129:03/29 15:26:37.603242 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -36133:03/29 15:26:37.603309 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -36136:03/29 15:26:37.603364 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36148:03/29 15:26:37.603423 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -36150:03/29 15:26:37.603621 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36151:03/29 15:26:37.603666 00099504 QRY streamStateReleaseBuf -36152:03/29 15:26:37.603696 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36153:03/29 15:26:37.603714 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -36162:03/29 15:26:37.603731 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -36165:03/29 15:26:37.604030 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36171:03/29 15:26:37.604047 00099504 QRY streamStateReleaseBuf -36178:03/29 15:26:37.604186 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36180:03/29 15:26:37.604236 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36181:03/29 15:26:37.604249 00099504 QRY streamStateReleaseBuf -36182:03/29 15:26:37.604268 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36193:03/29 15:26:37.604308 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -36194:03/29 15:26:37.604479 00099504 QRY streamStateReleaseBuf -36203:03/29 15:26:37.604668 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36204:03/29 15:26:37.604695 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36208:03/29 15:26:37.604762 00099504 QRY streamStateReleaseBuf -36217:03/29 15:26:37.604946 00099504 QRY streamStateGetCur_rocksdb -36218:03/29 15:26:37.605000 00099504 QRY streamStateCurPrev_rocksdb -36219:03/29 15:26:37.605054 00099504 QRY streamStateGetKVByCur_rocksdb -36221:03/29 15:26:37.605079 00099504 QRY streamStateFreeCur -36590:03/29 15:26:37.612667 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -36591:03/29 15:26:37.612748 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36592:03/29 15:26:37.612767 00099575 QRY streamStateReleaseBuf -36599:03/29 15:26:37.612867 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -36600:03/29 15:26:37.612900 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36657:03/29 15:26:37.613821 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -36658:03/29 15:26:37.613926 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36659:03/29 15:26:37.613941 00099575 QRY streamStateReleaseBuf -36681:03/29 15:26:37.614371 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -36682:03/29 15:26:37.614396 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36686:03/29 15:26:37.614465 00099575 QRY streamStateReleaseBuf -36695:03/29 15:26:37.614603 00099575 QRY streamStateClear_rocksdb seq:8 -36836:03/29 15:26:37.616970 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -36841:03/29 15:26:37.617055 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -36847:03/29 15:26:37.617218 00099575 QRY streamStateReleaseBuf -36872:03/29 15:26:37.617580 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -36873:03/29 15:26:37.617594 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -36874:03/29 15:26:37.617605 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36878:03/29 15:26:37.617660 00099575 QRY streamStateReleaseBuf -36883:03/29 15:26:37.617638 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -36884:03/29 15:26:37.617742 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36891:03/29 15:26:37.617762 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -36895:03/29 15:26:37.617885 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -36896:03/29 15:26:37.617924 00099504 QRY streamStateReleaseBuf -36898:03/29 15:26:37.617938 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36899:03/29 15:26:37.617951 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -36902:03/29 15:26:37.617998 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -36908:03/29 15:26:37.618051 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36909:03/29 15:26:37.617951 00099575 QRY streamStateClear_rocksdb seq:8 -36911:03/29 15:26:37.618063 00099504 QRY streamStateReleaseBuf -36923:03/29 15:26:37.618223 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36925:03/29 15:26:37.618268 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -36926:03/29 15:26:37.618279 00099504 QRY streamStateReleaseBuf -36927:03/29 15:26:37.618319 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -36928:03/29 15:26:37.618362 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -36929:03/29 15:26:37.618377 00099504 QRY streamStateReleaseBuf -36939:03/29 15:26:37.618467 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -36940:03/29 15:26:37.618509 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -36945:03/29 15:26:37.618592 00099504 QRY streamStateReleaseBuf -36971:03/29 15:26:37.619057 00099504 QRY streamStateGetCur_rocksdb -36972:03/29 15:26:37.619108 00099504 QRY streamStateCurPrev_rocksdb -36973:03/29 15:26:37.619154 00099504 QRY streamStateGetKVByCur_rocksdb -36975:03/29 15:26:37.619179 00099504 QRY streamStateFreeCur -37210:03/29 15:26:37.624330 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -37216:03/29 15:26:37.624434 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37217:03/29 15:26:37.624449 00099575 QRY streamStateReleaseBuf -37218:03/29 15:26:37.624474 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -37220:03/29 15:26:37.624513 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -37221:03/29 15:26:37.624548 00099575 QRY streamStateReleaseBuf -37230:03/29 15:26:37.624686 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -37231:03/29 15:26:37.624710 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37235:03/29 15:26:37.624748 00099575 QRY streamStateReleaseBuf -37248:03/29 15:26:37.624926 00099575 QRY streamStateGetCur_rocksdb -37251:03/29 15:26:37.624975 00099575 QRY streamStateCurPrev_rocksdb -37252:03/29 15:26:37.625028 00099575 QRY streamStateGetKVByCur_rocksdb -37254:03/29 15:26:37.625060 00099575 QRY streamStateFreeCur -37627:03/29 15:26:37.823270 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to del from default -37628:03/29 15:26:37.823315 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37682:03/29 15:26:37.824480 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] failed to read from default, err: not exist -37683:03/29 15:26:37.824646 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to write to default, valLen:135 -37684:03/29 15:26:37.824676 00099593 QRY streamStateReleaseBuf -37705:03/29 15:26:37.825482 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 8] succ to read from default -37706:03/29 15:26:37.825552 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37710:03/29 15:26:37.825677 00099593 QRY streamStateReleaseBuf -37719:03/29 15:26:37.825826 00099593 QRY streamStateClear_rocksdb seq:8 -37818:03/29 15:26:37.830111 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -37821:03/29 15:26:37.830255 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to del from default -37822:03/29 15:26:37.830299 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -37823:03/29 15:26:37.830329 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -37824:03/29 15:26:37.830368 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to read from default -37825:03/29 15:26:37.830392 00099504 QRY streamStateReleaseBuf -37826:03/29 15:26:37.830423 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -37827:03/29 15:26:37.830458 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -37828:03/29 15:26:37.830490 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -37829:03/29 15:26:37.830558 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37830:03/29 15:26:37.830580 00099504 QRY streamStateReleaseBuf -37836:03/29 15:26:37.830669 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -37837:03/29 15:26:37.830733 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -37838:03/29 15:26:37.830755 00099504 QRY streamStateReleaseBuf -37839:03/29 15:26:37.830787 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] failed to read from default, err: not exist -37840:03/29 15:26:37.830842 00099504 QRY streamState str:[groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to write to default, valLen:159 -37841:03/29 15:26:37.830864 00099504 QRY streamStateReleaseBuf -37850:03/29 15:26:37.831140 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -37851:03/29 15:26:37.831169 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -37855:03/29 15:26:37.831253 00099504 QRY streamStateReleaseBuf -37864:03/29 15:26:37.831412 00099504 QRY streamStateGetCur_rocksdb -37865:03/29 15:26:37.831473 00099504 QRY streamStateCurPrev_rocksdb -37866:03/29 15:26:37.831548 00099504 QRY streamStateGetKVByCur_rocksdb -37868:03/29 15:26:37.831590 00099504 QRY streamStateFreeCur -38448:03/29 15:26:38.045401 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -38449:03/29 15:26:38.045492 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -38450:03/29 15:26:38.045507 00099575 QRY streamStateReleaseBuf -38457:03/29 15:26:38.045670 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -38458:03/29 15:26:38.045695 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38462:03/29 15:26:38.045736 00099575 QRY streamStateReleaseBuf -38471:03/29 15:26:38.045894 00099575 QRY streamStateClear_rocksdb seq:19 -38586:03/29 15:26:38.048400 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -38590:03/29 15:26:38.048485 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -38602:03/29 15:26:38.048526 00099593 QRY streamStateReleaseBuf -38606:03/29 15:26:38.048792 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -38614:03/29 15:26:38.048981 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -38616:03/29 15:26:38.049143 00099593 QRY streamStateReleaseBuf -38626:03/29 15:26:38.049379 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -38627:03/29 15:26:38.049402 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38635:03/29 15:26:38.049480 00099593 QRY streamStateReleaseBuf -38644:03/29 15:26:38.049633 00099593 QRY streamStateGetCur_rocksdb -38647:03/29 15:26:38.049683 00099593 QRY streamStateCurPrev_rocksdb -38648:03/29 15:26:38.049776 00099593 QRY streamStateGetKVByCur_rocksdb -38651:03/29 15:26:38.049812 00099593 QRY streamStateFreeCur -38954:03/29 15:26:38.056513 00099584 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] failed to read from default, err: not exist -38962:03/29 15:26:38.056672 00099584 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to write to default, valLen:135 -38963:03/29 15:26:38.056816 00099584 QRY streamStateReleaseBuf -38981:03/29 15:26:38.057479 00099584 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to read from default -38982:03/29 15:26:38.057676 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -38986:03/29 15:26:38.057789 00099584 QRY streamStateReleaseBuf -39011:03/29 15:26:38.058195 00099584 QRY streamStateClear_rocksdb seq:19 -39234:03/29 15:26:38.063323 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39242:03/29 15:26:38.063607 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39243:03/29 15:26:38.063622 00099593 QRY streamStateReleaseBuf -39244:03/29 15:26:38.063679 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to read from default -39245:03/29 15:26:38.063721 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -39246:03/29 15:26:38.063740 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to write to default, valLen:159 -39247:03/29 15:26:38.063758 00099593 QRY streamStateReleaseBuf -39252:03/29 15:26:38.063810 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -39254:03/29 15:26:38.063823 00099504 QRY streamStateReleaseBuf -39263:03/29 15:26:38.064107 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -39274:03/29 15:26:38.064139 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39278:03/29 15:26:38.064452 00099504 QRY streamStateReleaseBuf -39292:03/29 15:26:38.064610 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39295:03/29 15:26:38.064734 00099504 QRY streamStateClear_rocksdb seq:8 -39304:03/29 15:26:38.064698 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39308:03/29 15:26:38.065178 00099593 QRY streamStateReleaseBuf -39330:03/29 15:26:38.065629 00099593 QRY streamStateGetCur_rocksdb -39336:03/29 15:26:38.065809 00099593 QRY streamStateCurPrev_rocksdb -39341:03/29 15:26:38.065967 00099593 QRY streamStateGetKVByCur_rocksdb -39358:03/29 15:26:38.066225 00099593 QRY streamStateFreeCur -39530:03/29 15:26:38.069291 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39532:03/29 15:26:38.069399 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39533:03/29 15:26:38.069441 00099593 QRY streamStateReleaseBuf -39534:03/29 15:26:38.069469 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -39543:03/29 15:26:38.069576 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -39544:03/29 15:26:38.069618 00099593 QRY streamStateReleaseBuf -39570:03/29 15:26:38.069703 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] failed to read from default, err: not exist -39575:03/29 15:26:38.070400 00099504 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to write to default, valLen:135 -39576:03/29 15:26:38.070424 00099504 QRY streamStateReleaseBuf -39594:03/29 15:26:38.070680 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to read from default -39595:03/29 15:26:38.070791 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39599:03/29 15:26:38.070851 00099504 QRY streamStateReleaseBuf -39603:03/29 15:26:38.070998 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39607:03/29 15:26:38.071113 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39613:03/29 15:26:38.071207 00099593 QRY streamStateReleaseBuf -39657:03/29 15:26:38.072025 00099504 QRY streamStateClear_rocksdb seq:8 -39683:03/29 15:26:38.072546 00099593 QRY streamStateGetCur_rocksdb -39685:03/29 15:26:38.072616 00099593 QRY streamStateCurPrev_rocksdb -39688:03/29 15:26:38.072721 00099593 QRY streamStateGetKVByCur_rocksdb -39691:03/29 15:26:38.072755 00099593 QRY streamStateFreeCur -39868:03/29 15:26:38.079438 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39876:03/29 15:26:38.079538 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -39877:03/29 15:26:38.079739 00099593 QRY streamStateReleaseBuf -39879:03/29 15:26:38.079775 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -39892:03/29 15:26:38.079910 00099593 QRY streamState str:[groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to write to default, valLen:159 -39893:03/29 15:26:38.080684 00099593 QRY streamStateReleaseBuf -39916:03/29 15:26:38.081473 00099593 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to read from default -39917:03/29 15:26:38.081567 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -39924:03/29 15:26:38.081650 00099593 QRY streamStateReleaseBuf -39935:03/29 15:26:38.081780 00099593 QRY streamStateGetCur_rocksdb -39937:03/29 15:26:38.081903 00099593 QRY streamStateCurPrev_rocksdb -39941:03/29 15:26:38.081980 00099593 QRY streamStateGetKVByCur_rocksdb -39947:03/29 15:26:38.082150 00099593 QRY streamStateFreeCur -40238:03/29 15:26:38.088762 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -40239:03/29 15:26:38.088910 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -40240:03/29 15:26:38.088931 00099575 QRY streamStateReleaseBuf -40249:03/29 15:26:38.089088 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -40262:03/29 15:26:38.089141 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40271:03/29 15:26:38.089357 00099575 QRY streamStateReleaseBuf -40313:03/29 15:26:38.090004 00099575 QRY streamStateClear_rocksdb seq:19 -40561:03/29 15:26:38.094136 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -40563:03/29 15:26:38.094427 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -40565:03/29 15:26:38.094445 00099593 QRY streamStateReleaseBuf -40568:03/29 15:26:38.094478 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -40573:03/29 15:26:38.094521 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -40583:03/29 15:26:38.094535 00099593 QRY streamStateReleaseBuf -40636:03/29 15:26:38.095424 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -40637:03/29 15:26:38.095473 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40638:03/29 15:26:38.095496 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40642:03/29 15:26:38.095576 00099593 QRY streamStateReleaseBuf -40643:03/29 15:26:38.095504 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -40648:03/29 15:26:38.095597 00099575 QRY streamStateReleaseBuf -40663:03/29 15:26:38.095838 00099593 QRY streamStateGetCur_rocksdb -40666:03/29 15:26:38.095882 00099593 QRY streamStateCurPrev_rocksdb -40668:03/29 15:26:38.095926 00099593 QRY streamStateGetKVByCur_rocksdb -40670:03/29 15:26:38.095882 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -40671:03/29 15:26:38.095965 00099593 QRY streamStateFreeCur -40672:03/29 15:26:38.095972 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40678:03/29 15:26:38.096018 00099575 QRY streamStateReleaseBuf -40691:03/29 15:26:38.096133 00099575 QRY streamStateClear_rocksdb seq:19 -40842:03/29 15:26:38.098338 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40843:03/29 15:26:38.098396 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -40844:03/29 15:26:38.098410 00099593 QRY streamStateReleaseBuf -40855:03/29 15:26:38.098439 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -40859:03/29 15:26:38.098591 00099593 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -40860:03/29 15:26:38.098608 00099593 QRY streamStateReleaseBuf -40946:03/29 15:26:38.098842 00099593 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -40947:03/29 15:26:38.100078 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -40951:03/29 15:26:38.100132 00099593 QRY streamStateReleaseBuf -40960:03/29 15:26:38.100244 00099593 QRY streamStateGetCur_rocksdb -40961:03/29 15:26:38.100281 00099593 QRY streamStateCurPrev_rocksdb -40962:03/29 15:26:38.100315 00099593 QRY streamStateGetKVByCur_rocksdb -40964:03/29 15:26:38.100347 00099593 QRY streamStateFreeCur -41182:03/29 15:26:38.103325 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -41192:03/29 15:26:38.103733 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -41195:03/29 15:26:38.103855 00099504 QRY streamStateReleaseBuf -41215:03/29 15:26:38.104349 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -41216:03/29 15:26:38.104372 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41228:03/29 15:26:38.104601 00099504 QRY streamStateReleaseBuf -41285:03/29 15:26:38.105526 00099504 QRY streamStateClear_rocksdb seq:8 -41487:03/29 15:26:38.108582 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41490:03/29 15:26:38.108936 00099584 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -41491:03/29 15:26:38.108955 00099584 QRY streamStateReleaseBuf -41493:03/29 15:26:38.108983 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -41501:03/29 15:26:38.109032 00099584 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -41502:03/29 15:26:38.109122 00099584 QRY streamStateReleaseBuf -41527:03/29 15:26:38.109355 00099584 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41528:03/29 15:26:38.109395 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41533:03/29 15:26:38.109705 00099584 QRY streamStateReleaseBuf -41588:03/29 15:26:38.110405 00099584 QRY streamStateGetCur_rocksdb -41589:03/29 15:26:38.110567 00099584 QRY streamStateCurPrev_rocksdb -41591:03/29 15:26:38.110601 00099584 QRY streamStateGetKVByCur_rocksdb -41599:03/29 15:26:38.110694 00099584 QRY streamStateFreeCur -41606:03/29 15:26:38.110744 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -41613:03/29 15:26:38.110924 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -41614:03/29 15:26:38.111008 00099504 QRY streamStateReleaseBuf -41631:03/29 15:26:38.111153 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -41632:03/29 15:26:38.111212 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41637:03/29 15:26:38.111266 00099504 QRY streamStateReleaseBuf -41656:03/29 15:26:38.111573 00099504 QRY streamStateClear_rocksdb seq:8 -41784:03/29 15:26:38.113332 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41785:03/29 15:26:38.113399 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -41786:03/29 15:26:38.113417 00099575 QRY streamStateReleaseBuf -41787:03/29 15:26:38.113447 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -41788:03/29 15:26:38.113495 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -41789:03/29 15:26:38.113511 00099575 QRY streamStateReleaseBuf -41798:03/29 15:26:38.113713 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -41799:03/29 15:26:38.113738 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -41803:03/29 15:26:38.113803 00099575 QRY streamStateReleaseBuf -41812:03/29 15:26:38.113982 00099575 QRY streamStateGetCur_rocksdb -41813:03/29 15:26:38.114026 00099575 QRY streamStateCurPrev_rocksdb -41814:03/29 15:26:38.114062 00099575 QRY streamStateGetKVByCur_rocksdb -41816:03/29 15:26:38.114126 00099575 QRY streamStateFreeCur -42853:03/29 15:26:38.329316 00099504 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 19] succ to del from default -42857:03/29 15:26:38.329888 00099504 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 19] succ to del from default -42859:03/29 15:26:38.330165 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to del from default -42860:03/29 15:26:38.330208 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42861:03/29 15:26:38.330257 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42862:03/29 15:26:38.330275 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42958:03/29 15:26:38.332359 00099569 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 8] succ to del from default -42964:03/29 15:26:38.332407 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to del from default -42965:03/29 15:26:38.332473 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42966:03/29 15:26:38.332488 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -42981:03/29 15:26:38.332676 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] failed to read from default, err: not exist -42984:03/29 15:26:38.332745 00099504 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to write to default, valLen:135 -42986:03/29 15:26:38.332764 00099504 QRY streamStateReleaseBuf -43046:03/29 15:26:38.334165 00099504 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 19] succ to read from default -43048:03/29 15:26:38.334198 00099504 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43053:03/29 15:26:38.334321 00099504 QRY streamStateReleaseBuf -43065:03/29 15:26:38.334481 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] failed to read from default, err: not exist -43067:03/29 15:26:38.334534 00099504 QRY streamStateClear_rocksdb seq:19 -43068:03/29 15:26:38.334552 00099569 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to write to default, valLen:135 -43069:03/29 15:26:38.334574 00099569 QRY streamStateReleaseBuf -43100:03/29 15:26:38.335311 00099569 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 8] succ to read from default -43101:03/29 15:26:38.335423 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43109:03/29 15:26:38.335477 00099569 QRY streamStateReleaseBuf -43121:03/29 15:26:38.335782 00099569 QRY streamStateClear_rocksdb seq:8 -43282:03/29 15:26:38.339328 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to del from default -43283:03/29 15:26:38.339399 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -43289:03/29 15:26:38.339504 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to del from default -43290:03/29 15:26:38.339565 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] succ to del from default -43291:03/29 15:26:38.339625 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] succ to del from default -43292:03/29 15:26:38.339705 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to del from default -43293:03/29 15:26:38.339749 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 11] failed to read from default, err: not exist -43294:03/29 15:26:38.339784 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -43295:03/29 15:26:38.339923 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] failed to read from default, err: not exist -43296:03/29 15:26:38.339977 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 13] succ to read from default -43297:03/29 15:26:38.339998 00099575 QRY streamStateReleaseBuf -43298:03/29 15:26:38.340051 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 15] failed to read from default, err: not exist -43299:03/29 15:26:38.340084 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 17] failed to read from default, err: not exist -43300:03/29 15:26:38.340164 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43301:03/29 15:26:38.340186 00099575 QRY streamStateReleaseBuf -43302:03/29 15:26:38.340222 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -43303:03/29 15:26:38.340256 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -43304:03/29 15:26:38.340310 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] failed to read from default, err: not exist -43305:03/29 15:26:38.340345 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to read from default -43306:03/29 15:26:38.340579 00099575 QRY streamStateReleaseBuf -43307:03/29 15:26:38.340617 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -43308:03/29 15:26:38.340649 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -43309:03/29 15:26:38.340721 00099575 QRY streamState str:[groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43310:03/29 15:26:38.340743 00099575 QRY streamStateReleaseBuf -43311:03/29 15:26:38.340781 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -43312:03/29 15:26:38.340834 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -43313:03/29 15:26:38.340874 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -43314:03/29 15:26:38.340924 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to read from default -43315:03/29 15:26:38.340947 00099575 QRY streamStateReleaseBuf -43316:03/29 15:26:38.341105 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:2,seq: 15] failed to read from default, err: not exist -43317:03/29 15:26:38.341238 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:3,seq: 17] failed to read from default, err: not exist -43318:03/29 15:26:38.341400 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43319:03/29 15:26:38.341431 00099575 QRY streamStateReleaseBuf -43325:03/29 15:26:38.341593 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43326:03/29 15:26:38.341676 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43327:03/29 15:26:38.341699 00099575 QRY streamStateReleaseBuf -43328:03/29 15:26:38.341739 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] failed to read from default, err: not exist -43329:03/29 15:26:38.342152 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to write to default, valLen:159 -43330:03/29 15:26:38.342181 00099575 QRY streamStateReleaseBuf -43337:03/29 15:26:38.342311 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:-1,seq: 9] succ to del from default -43338:03/29 15:26:38.342376 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to del from default -43339:03/29 15:26:38.342435 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] succ to del from default -43340:03/29 15:26:38.342519 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to del from default -43341:03/29 15:26:38.342561 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:0,seq: 11] failed to read from default, err: not exist -43342:03/29 15:26:38.342594 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:1,seq: 13] failed to read from default, err: not exist -43343:03/29 15:26:38.342629 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:2,seq: 15] failed to read from default, err: not exist -43344:03/29 15:26:38.342661 00099575 QRY streamState str: [groupId:0,ts:1648791220000,opNum:3,seq: 17] failed to read from default, err: not exist -43345:03/29 15:26:38.342698 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -43346:03/29 15:26:38.342738 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] failed to read from default, err: not exist -43347:03/29 15:26:38.342776 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:0,seq: 11] succ to read from default -43348:03/29 15:26:38.342799 00099575 QRY streamStateReleaseBuf -43349:03/29 15:26:38.342834 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -43350:03/29 15:26:38.342865 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:2,seq: 15] failed to read from default, err: not exist -43351:03/29 15:26:38.342908 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:3,seq: 17] failed to read from default, err: not exist -43352:03/29 15:26:38.342975 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43353:03/29 15:26:38.342997 00099575 QRY streamStateReleaseBuf -43359:03/29 15:26:38.343177 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43360:03/29 15:26:38.343248 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to write to default, valLen:159 -43361:03/29 15:26:38.343269 00099575 QRY streamStateReleaseBuf -43362:03/29 15:26:38.343305 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:1,seq: 13] failed to read from default, err: not exist -43363:03/29 15:26:38.343368 00099575 QRY streamState str:[groupId:0,ts:1648791230000,opNum:1,seq: 13] succ to write to default, valLen:159 -43364:03/29 15:26:38.343389 00099575 QRY streamStateReleaseBuf -43373:03/29 15:26:38.343768 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43382:03/29 15:26:38.344088 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 9] succ to read from default -43383:03/29 15:26:38.344118 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43387:03/29 15:26:38.344203 00099575 QRY streamStateReleaseBuf -43388:03/29 15:26:38.344244 00099575 QRY streamState str: [groupId:0,ts:1648791230000,opNum:-1,seq: 9] succ to read from default -43389:03/29 15:26:38.344273 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -43393:03/29 15:26:38.344355 00099575 QRY streamStateReleaseBuf -43403:03/29 15:26:38.344525 00099575 QRY streamStateGetCur_rocksdb -43404:03/29 15:26:38.344584 00099575 QRY streamStateCurPrev_rocksdb -43405:03/29 15:26:38.344662 00099575 QRY streamStateGetKVByCur_rocksdb -43407:03/29 15:26:38.344692 00099575 QRY streamStateFreeCur -51853:03/29 15:26:39.501810 00099718 QRY streamStateSetNumber, seq: 20 -51854:03/29 15:26:39.501840 00099718 QRY streamStateSetNumber, seq: 21 -51858:03/29 15:26:39.507708 00099713 QRY streamStateSetNumber, seq: 22 -51859:03/29 15:26:39.507730 00099713 QRY streamStateSetNumber, seq: 23 -51861:03/29 15:26:39.507756 00099713 QRY streamStateSetNumber, seq: 24 -51862:03/29 15:26:39.507860 00099713 QRY streamStateSetNumber, seq: 25 -51864:03/29 15:26:39.507885 00099713 QRY streamStateSetNumber, seq: 26 -51865:03/29 15:26:39.508056 00099713 QRY streamStateSetNumber, seq: 27 -51866:03/29 15:26:39.508083 00099713 QRY streamStateSetNumber, seq: 28 -51869:03/29 15:26:39.508105 00099713 QRY streamStateSetNumber, seq: 29 -51874:03/29 15:26:39.508152 00099713 QRY streamStateSetNumber, seq: 30 -51883:03/29 15:26:39.508317 00099723 QRY streamStateSetNumber, seq: 31 -51885:03/29 15:26:39.508343 00099723 QRY streamStateSetNumber, seq: 32 -51974:03/29 15:26:39.520642 00099728 QRY streamStateSetNumber, seq: 33 -51975:03/29 15:26:39.520671 00099728 QRY streamStateSetNumber, seq: 34 -52162:03/29 15:26:39.588845 00099713 QRY streamStateSetNumber, seq: 35 -52163:03/29 15:26:39.588940 00099713 QRY streamStateSetNumber, seq: 36 -52499:03/29 15:26:39.649197 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -52500:03/29 15:26:39.649903 00099593 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -52501:03/29 15:26:39.649918 00099593 QRY streamStateReleaseBuf -52516:03/29 15:26:39.650272 00099593 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -52517:03/29 15:26:39.650333 00099593 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52521:03/29 15:26:39.650384 00099593 QRY streamStateReleaseBuf -52530:03/29 15:26:39.650500 00099593 QRY streamStateClear_rocksdb seq:36 -52718:03/29 15:26:39.653130 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] failed to read from default, err: not exist -52788:03/29 15:26:39.654198 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -52789:03/29 15:26:39.654217 00099575 QRY streamStateReleaseBuf -52790:03/29 15:26:39.654226 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -52791:03/29 15:26:39.654248 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -52792:03/29 15:26:39.654280 00099584 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -52793:03/29 15:26:39.654295 00099584 QRY streamStateReleaseBuf -52794:03/29 15:26:39.654295 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -52796:03/29 15:26:39.654313 00099575 QRY streamStateReleaseBuf -52806:03/29 15:26:39.654460 00099584 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -52811:03/29 15:26:39.654483 00099584 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52815:03/29 15:26:39.654616 00099584 QRY streamStateReleaseBuf -52823:03/29 15:26:39.654626 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52825:03/29 15:26:39.654922 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -52829:03/29 15:26:39.655001 00099575 QRY streamStateReleaseBuf -52834:03/29 15:26:39.654938 00099584 QRY streamStateClear_rocksdb seq:36 -52875:03/29 15:26:39.655766 00099575 QRY streamStateGetCur_rocksdb -52880:03/29 15:26:39.655894 00099575 QRY streamStateCurPrev_rocksdb -52881:03/29 15:26:39.655933 00099575 QRY streamStateGetKVByCur_rocksdb -52884:03/29 15:26:39.655957 00099575 QRY streamStateFreeCur -52941:03/29 15:26:39.656606 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52950:03/29 15:26:39.656678 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -52970:03/29 15:26:39.656771 00099575 QRY streamStateReleaseBuf -52972:03/29 15:26:39.657012 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to read from default -52973:03/29 15:26:39.657072 00099575 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -52974:03/29 15:26:39.657088 00099575 QRY streamStateReleaseBuf -52992:03/29 15:26:39.657214 00099575 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -52997:03/29 15:26:39.657436 00099575 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53001:03/29 15:26:39.657517 00099575 QRY streamStateReleaseBuf -53047:03/29 15:26:39.658463 00099575 QRY streamStateGetCur_rocksdb -53048:03/29 15:26:39.658509 00099575 QRY streamStateCurPrev_rocksdb -53049:03/29 15:26:39.658527 00099575 QRY streamStateGetKVByCur_rocksdb -53051:03/29 15:26:39.658549 00099575 QRY streamStateFreeCur -53519:03/29 15:26:39.666194 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to del from default -53522:03/29 15:26:39.666264 00099732 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53582:03/29 15:26:39.667215 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] failed to read from default, err: not exist -53583:03/29 15:26:39.667264 00099732 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to write to default, valLen:135 -53584:03/29 15:26:39.667281 00099732 QRY streamStateReleaseBuf -53605:03/29 15:26:39.667661 00099732 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to read from default -53606:03/29 15:26:39.667684 00099732 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53610:03/29 15:26:39.667748 00099732 QRY streamStateReleaseBuf -53619:03/29 15:26:39.667875 00099732 QRY streamStateClear_rocksdb seq:36 -53718:03/29 15:26:39.669594 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to del from default -53721:03/29 15:26:39.669669 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to del from default -53723:03/29 15:26:39.669696 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -53725:03/29 15:26:39.669714 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 26] failed to read from default, err: not exist -53736:03/29 15:26:39.669728 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 28] failed to read from default, err: not exist -53743:03/29 15:26:39.669965 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 30] failed to read from default, err: not exist -53763:03/29 15:26:39.670203 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] failed to read from default, err: not exist -53765:03/29 15:26:39.670397 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to write to default, valLen:159 -53766:03/29 15:26:39.670414 00099569 QRY streamStateReleaseBuf -53767:03/29 15:26:39.670459 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -53771:03/29 15:26:39.670497 00099569 QRY streamState str:[groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to write to default, valLen:159 -53772:03/29 15:26:39.670570 00099569 QRY streamStateReleaseBuf -53794:03/29 15:26:39.670935 00099569 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to read from default -53795:03/29 15:26:39.670954 00099569 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -53803:03/29 15:26:39.671070 00099569 QRY streamStateReleaseBuf -53818:03/29 15:26:39.671279 00099569 QRY streamStateGetCur_rocksdb -53819:03/29 15:26:39.671326 00099569 QRY streamStateCurPrev_rocksdb -53820:03/29 15:26:39.671356 00099569 QRY streamStateGetKVByCur_rocksdb -53822:03/29 15:26:39.671388 00099569 QRY streamStateFreeCur -55456:03/29 15:26:40.136068 00099717 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 36] succ to del from default -55457:03/29 15:26:40.136115 00099717 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -55621:03/29 15:26:40.140154 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:-1,seq: 22] succ to del from default -55624:03/29 15:26:40.140304 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] succ to del from default -55626:03/29 15:26:40.140337 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:0,seq: 24] failed to read from default, err: not exist -55627:03/29 15:26:40.140362 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:1,seq: 26] failed to read from default, err: not exist -55629:03/29 15:26:40.140394 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:2,seq: 28] failed to read from default, err: not exist -55636:03/29 15:26:40.140426 00099722 QRY streamState str: [groupId:0,ts:1648791210000,opNum:3,seq: 30] failed to read from default, err: not exist -55647:03/29 15:26:40.140826 00099722 QRY streamState str: [groupId:0] failed to read from parname, err: not exist -55656:03/29 15:26:40.140968 00099722 QRY streamStateGetCur_rocksdb -55657:03/29 15:26:40.141149 00099722 QRY streamStateFreeCur -55658:03/29 15:26:40.141191 00099722 QRY streamStateCurPrev_rocksdb diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index f30c6c5314..842d6986cc 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1029,6 +1029,7 @@ ,,y,script,./test.sh -f tsim/stream/basic2.sim ,,y,script,./test.sh -f tsim/stream/basic3.sim ,,y,script,./test.sh -f tsim/stream/basic4.sim +,,y,script,./test.sh -f tsim/stream/checkpointInterval0.sim ,,y,script,./test.sh -f tsim/stream/checkStreamSTable1.sim ,,y,script,./test.sh -f tsim/stream/checkStreamSTable.sim ,,y,script,./test.sh -f tsim/stream/deleteInterval.sim diff --git a/tests/script/log b/tests/script/log new file mode 100644 index 0000000000..276333a1ae --- /dev/null +++ b/tests/script/log @@ -0,0 +1,103 @@ +------------------------------------------------------------------------ +Start TDengine Testing Case ... +BUILD_DIR: /root/yihao/work/TDengine/debug +SIM_DIR : /root/yihao/work/TDengine/sim +CODE_DIR : /root/yihao/work/TDengine/tests/script +CFG_DIR : /root/yihao/work/TDengine/sim/tsim/cfg +ASAN_DIR : /root/yihao/work/TDengine/sim/asan +------------------------------------------------------------------------ +ExcuteCmd: /root/yihao/work/TDengine/debug/build/bin/tsim -c /root/yihao/work/TDengine/sim/tsim/cfg -f tsim/stream/state0.sim +AsanDir: /root/yihao/work/TDengine/sim/asan/tsim.asan +08/18 17:00:43.118420 00438970 SIM simulator is running ... +Executing deploy.sh +SCRIPT_DIR: /root/yihao/work/TDengine/tests/script +------------ start dnode1 +nohup /root/yihao/work/TDengine/debug/build/bin/taosd -c /root/yihao/work/TDengine/sim/dnode1/cfg > /dev/null 2>&1 & +08/18 17:00:43.288417 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms begin +08/18 17:00:43.338486 00438970 SIM script:tsim/stream/state0.sim, sleep 50ms finished +08/18 17:00:43.679227 00438970 SIM script:tsim/stream/state0.sim, =============== create database +08/18 17:00:45.450379 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL +08/18 17:00:45.454189 00438970 SIM script:tsim/stream/state0.sim, create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a); +08/18 17:00:45.610789 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:46.610955 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:46.611228 00438970 SIM script:tsim/stream/state0.sim, =====rows=0 +08/18 17:00:46.627080 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:47.627254 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:47.637364 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:48.637446 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:48.684845 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:49.685055 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:49.685390 00438970 SIM script:tsim/stream/state0.sim, =====rows=0 +08/18 17:00:49.712001 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:50.712137 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:50.713185 00438970 SIM script:tsim/stream/state0.sim, loop1 end +08/18 17:00:50.737435 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:51.737618 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:51.738003 00438970 SIM script:tsim/stream/state0.sim, =====data21=null +08/18 17:00:51.764834 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:52.764971 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:52.805578 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:53.805742 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:53.806044 00438970 SIM script:tsim/stream/state0.sim, =====data26=null +08/18 17:00:53.828622 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:54.828756 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:54.890423 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:55.890533 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:55.890839 00438970 SIM script:tsim/stream/state0.sim, =====data21=null +08/18 17:00:55.917818 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:56.918008 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:56.966614 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:57.966809 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:57.967085 00438970 SIM script:tsim/stream/state0.sim, ====loop4=rows=0 +08/18 17:00:57.993757 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:00:58.993936 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:00:58.994839 00438970 SIM script:tsim/stream/state0.sim, loop4 end +08/18 17:00:59.678915 00438970 SIM script:tsim/stream/state0.sim, information_schema 23-08-18 17:00:43.491 NULL +08/18 17:00:59.698124 00438970 SIM script:tsim/stream/state0.sim, create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a); +08/18 17:00:59.862389 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:00.862605 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:01.584856 00438970 SIM script:tsim/stream/state0.sim, create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt3 as select _wstart, count(*) c1, sum(b) c3 from t1 state_window(a); +08/18 17:01:01.778514 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:02.778699 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:09.536746 00438970 SIM script:tsim/stream/state0.sim, create stream if not exists streams4 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart AS startts, min(c1),count(c1) from t1 state_window(c1); +08/18 17:01:09.789267 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:10.789450 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:10.863370 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:11.863556 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:11.909184 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:12.909288 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:12.936362 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:13.936494 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:15.301064 00438970 SIM script:tsim/stream/state0.sim, create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3 from tb state_window(a); +08/18 17:01:15.466754 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.466768 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.489373 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.489392 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.511826 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.511841 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.522397 00438970 SIM script:tsim/stream/state0.sim, data00:null +08/18 17:01:15.522416 00438970 SIM script:tsim/stream/state0.sim, data01:null +08/18 17:01:15.526939 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms begin +08/18 17:01:16.527119 00438970 SIM script:tsim/stream/state0.sim, sleep 1000ms finished +08/18 17:01:16.542936 00438970 SIM script:tsim/stream/state0.sim, state0 end +------------ stop dnode1 +try to kill by signal SIGINT +try to kill by signal SIGINT +try to kill by signal SIGINT +try to kill by signal SIGINT +08/18 17:01:20.674086 00438970 SIM script:tsim/stream/state0.sim, return cmd execute with:1 +08/18 17:01:20.674103 00438970 SIM script:tsim/stream/state0.sim, success +08/18 17:01:20.674165 00438970 SIM script:tsim/stream/state0.sim, background script num:0, stop them +08/18 17:01:20.674521 00438970 SIM ---------------------------------------------------------------------- +08/18 17:01:20.674528 00438970 SIM Simulation Test Done, 1 Passed: + +08/18 17:01:20.674534 00438970 SIM thread is stopped +08/18 17:01:20.674537 00438970 SIM execute result 0 +Execute result: 0 +Killing taosd processes +asan error_num: 0 +asan memory_leak: 0 +asan indirect_leak: 0 +asan runtime error: 0 +asan python error: 0 +no asan errors diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 5b1773e664..7da8da09bf 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -118,7 +118,7 @@ echo "statusInterval 1" >> $TAOS_CFG echo "dataDir $DATA_DIR" >> $TAOS_CFG echo "logDir $LOG_DIR" >> $TAOS_CFG echo "debugFlag 0" >> $TAOS_CFG -echo "tmrDebugFlag 143" >> $TAOS_CFG +echo "tmrDebugFlag 131" >> $TAOS_CFG echo "uDebugFlag 143" >> $TAOS_CFG echo "rpcDebugFlag 143" >> $TAOS_CFG echo "jniDebugFlag 143" >> $TAOS_CFG diff --git a/tests/script/tsim/stream/checkpointInterval0.sim b/tests/script/tsim/stream/checkpointInterval0.sim new file mode 100644 index 0000000000..1c212eb2a7 --- /dev/null +++ b/tests/script/tsim/stream/checkpointInterval0.sim @@ -0,0 +1,255 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 interval(10s); +sql create stream streams1 trigger window_close IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, count(*) c1, sum(a) from t1 interval(10s); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +$loop_count = 0 + +loop01: +sleep 1000 + +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 0 then + print =====rows=$rows expect 1 + goto loop01 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791223003,4,2,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + + +$loop_count = 0 + +loop3: +sleep 1000 + +print select * from streamt1; +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 2 + goto loop3 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop3 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop3 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791223004,5,2,3,1.1); + +loop4: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop4 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop4 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop4 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop4 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop4 +endi + +$loop_count = 0 + +loop5: +sleep 1000 + +print select * from streamt1; +sql select * from streamt1; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 2 + goto loop5 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop5 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop5 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointInterval1.sim b/tests/script/tsim/stream/checkpointInterval1.sim new file mode 100644 index 0000000000..21825e7f48 --- /dev/null +++ b/tests/script/tsim/stream/checkpointInterval1.sim @@ -0,0 +1,104 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +sql create database test vgroups 4; + +sql use test; + +sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st interval(10s); + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +sql insert into t2 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); +sql insert into t2 values(1648791223003,4,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop1 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointSession0.sim b/tests/script/tsim/stream/checkpointSession0.sim new file mode 100644 index 0000000000..1d503806c5 --- /dev/null +++ b/tests/script/tsim/stream/checkpointSession0.sim @@ -0,0 +1,178 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 session(ts, 10s); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791233003,4,2,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791233004,5,2,3,1.1); + +loop20: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop20 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop20 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop20 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop20 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop20 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointSession1.sim b/tests/script/tsim/stream/checkpointSession1.sim new file mode 100644 index 0000000000..5c9625aabb --- /dev/null +++ b/tests/script/tsim/stream/checkpointSession1.sim @@ -0,0 +1,104 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +sql create database test vgroups 4; + +sql use test; + +sql create stable st(ts timestamp,a int,b int,c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from st session(ts, 10s); + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +sql insert into t2 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); +sql insert into t2 values(1648791233003,4,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop1 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkpointState0.sim b/tests/script/tsim/stream/checkpointState0.sim new file mode 100644 index 0000000000..3836721212 --- /dev/null +++ b/tests/script/tsim/stream/checkpointState0.sim @@ -0,0 +1,178 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 -v debugFlag 135 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print step 1 + +print =============== create database +sql create database test vgroups 1; + +sql use test; + + +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) from t1 state_window(b); +sql insert into t1 values(1648791213000,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); + +$loop_count = 0 + +loop0: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop0 +endi + +# row 0 +if $data01 != 2 then + print =====data01=$data01 + goto loop0 +endi + +if $data02 != 3 then + print =====data02=$data02 + goto loop0 +endi + +print waiting for checkpoint generation 1 ...... + +sleep 25000 + +print restart taosd 01 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791213002,3,2,3,1.1); + +$loop_count = 0 + +loop1: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows expect 1 + goto loop1 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop1 +endi + +sql insert into t1 values(1648791233003,4,3,3,1.1); + +$loop_count = 0 + +loop2: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop2 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop2 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop2 +endi + +# row 1 +if $data11 != 1 then + print =====data11=$data11 + goto loop2 +endi + +if $data12 != 4 then + print =====data12=$data12 + goto loop2 +endi + +print step 2 + +print restart taosd 02 ...... + +system sh/stop_dnodes.sh + +system sh/exec.sh -n dnode1 -s start + +sql insert into t1 values(1648791233004,5,3,3,1.1); + +loop20: +sleep 1000 + +sql select * from streamt; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows expect 2 + goto loop20 +endi + +# row 0 +if $data01 != 3 then + print =====data01=$data01 + goto loop20 +endi + +if $data02 != 6 then + print =====data02=$data02 + goto loop20 +endi + +# row 1 +if $data11 != 2 then + print =====data11=$data11 + goto loop20 +endi + +if $data12 != 9 then + print =====data12=$data12 + goto loop20 +endi + +print end--------------------------------- + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/win-test-file b/tests/script/win-test-file index 4d578a93cd..4ff4b52f7e 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -237,6 +237,52 @@ ./test.sh -f tsim/table/table.sim ./test.sh -f tsim/table/tinyint.sim ./test.sh -f tsim/table/vgroup.sim +./test.sh -f tsim/stream/basic0.sim -g +./test.sh -f tsim/stream/basic1.sim +./test.sh -f tsim/stream/basic2.sim +./test.sh -f tsim/stream/basic3.sim +./test.sh -f tsim/stream/basic4.sim +./test.sh -f tsim/stream/checkpointInterval0.sim +./test.sh -f tsim/stream/checkStreamSTable1.sim +./test.sh -f tsim/stream/checkStreamSTable.sim +./test.sh -f tsim/stream/deleteInterval.sim +./test.sh -f tsim/stream/deleteSession.sim +./test.sh -f tsim/stream/deleteState.sim +./test.sh -f tsim/stream/distributeInterval0.sim +./test.sh -f tsim/stream/distributeIntervalRetrive0.sim +./test.sh -f tsim/stream/distributeSession0.sim +./test.sh -f tsim/stream/drop_stream.sim +./test.sh -f tsim/stream/fillHistoryBasic1.sim +./test.sh -f tsim/stream/fillHistoryBasic2.sim +./test.sh -f tsim/stream/fillHistoryBasic3.sim +./test.sh -f tsim/stream/fillIntervalDelete0.sim +./test.sh -f tsim/stream/fillIntervalDelete1.sim +./test.sh -f tsim/stream/fillIntervalLinear.sim +./test.sh -f tsim/stream/fillIntervalPartitionBy.sim +./test.sh -f tsim/stream/fillIntervalPrevNext1.sim +./test.sh -f tsim/stream/fillIntervalPrevNext.sim +./test.sh -f tsim/stream/fillIntervalRange.sim +./test.sh -f tsim/stream/fillIntervalValue.sim +./test.sh -f tsim/stream/ignoreCheckUpdate.sim +./test.sh -f tsim/stream/ignoreExpiredData.sim +./test.sh -f tsim/stream/partitionby1.sim +./test.sh -f tsim/stream/partitionbyColumnInterval.sim +./test.sh -f tsim/stream/partitionbyColumnSession.sim +./test.sh -f tsim/stream/partitionbyColumnState.sim +./test.sh -f tsim/stream/partitionby.sim +./test.sh -f tsim/stream/pauseAndResume.sim +./test.sh -f tsim/stream/schedSnode.sim +./test.sh -f tsim/stream/session0.sim +./test.sh -f tsim/stream/session1.sim +./test.sh -f tsim/stream/sliding.sim +./test.sh -f tsim/stream/state0.sim +./test.sh -f tsim/stream/state1.sim +./test.sh -f tsim/stream/triggerInterval0.sim +./test.sh -f tsim/stream/triggerSession0.sim +./test.sh -f tsim/stream/udTableAndTag0.sim +./test.sh -f tsim/stream/udTableAndTag1.sim +./test.sh -f tsim/stream/udTableAndTag2.sim +./test.sh -f tsim/stream/windowClose.sim ./test.sh -f tsim/trans/lossdata1.sim ./test.sh -f tsim/tmq/basic1.sim ./test.sh -f tsim/tmq/basic2.sim diff --git a/tests/system-test/2-query/stbJoin.py b/tests/system-test/2-query/stbJoin.py index e21a875cf2..677704648c 100644 --- a/tests/system-test/2-query/stbJoin.py +++ b/tests/system-test/2-query/stbJoin.py @@ -109,6 +109,9 @@ class TDTestCase: tdSql.query(f"select a.* from sta a join stb b on a.tg1=b.tg1 and a.ts=b.ts and a.tg2=b.tg2;") tdSql.checkRows(12) + tdSql.query(f"select a.* from sta a join stb b on a.tg1 != b.tg1 and a.ts=b.ts;") + tdSql.checkRows(36) + # tdSql.checkData(0,1,10) tdSql.error(f"select a.* from sta a join stb b on a.tg1=b.tg1 where a.ts=b.ts or a.tg2=b.tg2;") diff --git a/tests/system-test/6-cluster/clusterCommonCheck.py b/tests/system-test/6-cluster/clusterCommonCheck.py index 439f0b6b8c..5e5568c5c5 100644 --- a/tests/system-test/6-cluster/clusterCommonCheck.py +++ b/tests/system-test/6-cluster/clusterCommonCheck.py @@ -261,7 +261,7 @@ class ClusterComCheck: count+=1 else: tdLog.debug(tdSql.queryResult) - tdLog.notice(f"elections of {db_name} all vgroups are failed in{count} s ") + tdLog.notice(f"elections of {db_name} all vgroups are failed in {count} s ") caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno) tdLog.exit("%s(%d) failed " % args) diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py index fb9872a8f6..aa0c7a0177 100644 --- a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py +++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py @@ -27,7 +27,7 @@ class TDTestCase: def init(self, conn, logSql, replicaVar=1): tdLog.debug(f"start to excute {__file__}") self.TDDnodes = None - tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), True) self.host = socket.gethostname() diff --git a/tests/system-test/6-cluster/rollup.json b/tests/system-test/6-cluster/rollup.json index 02669acb93..a7bbc89fd7 100644 --- a/tests/system-test/6-cluster/rollup.json +++ b/tests/system-test/6-cluster/rollup.json @@ -1,6 +1,6 @@ { "filetype": "insert", - "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/", "host": "localhost", "port": 6030, "rest_port": 6041, diff --git a/tests/system-test/6-cluster/rollup_db.json b/tests/system-test/6-cluster/rollup_db.json index fedc47024c..d9ccd08ba5 100644 --- a/tests/system-test/6-cluster/rollup_db.json +++ b/tests/system-test/6-cluster/rollup_db.json @@ -1,6 +1,6 @@ { "filetype": "insert", - "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "cfgdir": "/home/lisa/Documents/workspace/tdengine/debug/../sim/dnode1/cfg/", "host": "localhost", "port": 6030, "rest_port": 6041,