From 2f44a8f09f5426944115f62fb758853db1ade955 Mon Sep 17 00:00:00 2001 From: t_max <1172915550@qq.com> Date: Wed, 1 Nov 2023 15:29:39 +0800 Subject: [PATCH 01/65] docs(taosAdapter): varbinary and geometry types in restful --- .../14-reference/02-rest-api/02-rest-api.mdx | 57 +++++++++++++++++++ docs/zh/08-connector/02-rest-api.mdx | 57 +++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/docs/en/14-reference/02-rest-api/02-rest-api.mdx b/docs/en/14-reference/02-rest-api/02-rest-api.mdx index 4da987213c..d56d1a0f24 100644 --- a/docs/en/14-reference/02-rest-api/02-rest-api.mdx +++ b/docs/en/14-reference/02-rest-api/02-rest-api.mdx @@ -262,6 +262,63 @@ The following types may be returned: - "INT UNSIGNED" - "BIGINT UNSIGNED" - "JSON" +- "VARBINARY" +- "GEOMETRY" + +`VARBINARY` and `GEOMETRY` types return data as Hex string, example: + +Prepare data + +```bash +create database demo +use demo +create table t(ts timestamp,c1 varbinary(20),c2 geometry(100)) +insert into t values(now,'\x7f8290','point(100 100)') +``` + +Execute query + +```bash +curl --location 'http://:/rest/sql' \ +--header 'Content-Type: text/plain' \ +--header 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' \ +--data 'select * from demo.t' +``` + +Return results + +```json +{ + "code": 0, + "column_meta": [ + [ + "ts", + "TIMESTAMP", + 8 + ], + [ + "c1", + "VARBINARY", + 20 + ], + [ + "c2", + "GEOMETRY", + 100 + ] + ], + "data": [ + [ + "2023-11-01T06:28:15.210Z", + "7f8290", + "010100000000000000000059400000000000005940" + ] + ], + "rows": 1 +} +``` + +- `010100000000000000000059400000000000005940` is [Well-Known Binary (WKB)](https://libgeos.org/specifications/wkb/) format for `point(100 100)` #### Errors diff --git a/docs/zh/08-connector/02-rest-api.mdx b/docs/zh/08-connector/02-rest-api.mdx index f3f1e087d8..904b06cfe1 100644 --- a/docs/zh/08-connector/02-rest-api.mdx +++ b/docs/zh/08-connector/02-rest-api.mdx @@ -257,6 +257,63 @@ curl -L -u username:password -d "" :/rest/sql/[db_name][?tz=timez - "INT UNSIGNED" - "BIGINT UNSIGNED" - "JSON" +- "VARBINARY" +- "GEOMETRY" + +`VARBINARY` 和 `GEOMETRY` 类型返回数据为 Hex 字符串,样例: + +准备数据 + +```bash +create database demo +use demo +create table t(ts timestamp,c1 varbinary(20),c2 geometry(100)) +insert into t values(now,'\x7f8290','point(100 100)') +``` + +执行查询 + +```bash +curl --location 'http://:/rest/sql' \ +--header 'Content-Type: text/plain' \ +--header 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' \ +--data 'select * from demo.t' +``` + +返回结果 + +```json +{ + "code": 0, + "column_meta": [ + [ + "ts", + "TIMESTAMP", + 8 + ], + [ + "c1", + "VARBINARY", + 20 + ], + [ + "c2", + "GEOMETRY", + 100 + ] + ], + "data": [ + [ + "2023-11-01T06:28:15.210Z", + "7f8290", + "010100000000000000000059400000000000005940" + ] + ], + "rows": 1 +} +``` + +- `010100000000000000000059400000000000005940` 为 `point(100 100)` 的 [Well-Known Binary (WKB)](https://libgeos.org/specifications/wkb/) 格式 #### 错误 From 979abd5c6f48ad82b487442d9abc20ca6f7f924d Mon Sep 17 00:00:00 2001 From: charles Date: Wed, 6 Dec 2023 08:47:05 +0800 Subject: [PATCH 02/65] add geometry test case to case list by charles --- tests/parallel_test/cases.task | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index cf4559100b..39db02f869 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -518,6 +518,7 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tagFilter.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/projectionDesc.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/ts_3405_3398_3423.py -N 3 -n 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/geometry.py ,,n,system-test,python3 ./test.py -f 2-query/queryQnode.py ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode1mnode.py From 15f690dd6de0a6671e0a2b6eaa360b7e81c69b7b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 11:44:41 +0800 Subject: [PATCH 03/65] add http fail fast --- source/libs/transport/src/thttp.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index afb982a50a..bbfccf9f70 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -34,6 +34,7 @@ typedef struct SHttpModule { SAsyncPool* asyncPool; TdThread thread; SHashObj* connStatusTable; + int8_t quit; } SHttpModule; typedef struct SHttpMsg { @@ -190,19 +191,37 @@ static void httpDestroyMsg(SHttpMsg* msg) { taosMemoryFree(msg->cont); taosMemoryFree(msg); } + +static void httpMayDiscardMsg(SHttpModule* http, SAsyncItem* item) { + SHttpMsg *msg = NULL, *quitMsg = NULL; + int8_t quit = atomic_load_8(&http->quit); + if (quit == 1) { + while (!QUEUE_IS_EMPTY(&item->qmsg)) { + queue* h = QUEUE_HEAD(&item->qmsg); + QUEUE_REMOVE(h); + msg = QUEUE_DATA(h, SHttpMsg, q); + if (!msg->quit) { + httpDestroyMsg(msg); + } else { + quitMsg = msg; + } + } + QUEUE_PUSH(&item->qmsg, &quitMsg->q); + } +} static void httpAsyncCb(uv_async_t* handle) { SAsyncItem* item = handle->data; SHttpModule* http = item->pThrd; SHttpMsg *msg = NULL, *quitMsg = NULL; - - queue wq; + queue wq; QUEUE_INIT(&wq); static int32_t BATCH_SIZE = 5; int32_t count = 0; taosThreadMutexLock(&item->mtx); + httpMayDiscardMsg(http, item); while (!QUEUE_IS_EMPTY(&item->qmsg) && count++ < BATCH_SIZE) { queue* h = QUEUE_HEAD(&item->qmsg); @@ -526,6 +545,8 @@ void transHttpEnvDestroy() { return; } SHttpModule* load = taosAcquireRef(httpRefMgt, httpRef); + + atomic_store_8(&load->quit, 1); httpSendQuit(); taosThreadJoin(load->thread, NULL); From f48045d9c07de0a78da03ddb39ad8db743950ed1 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 11:47:08 +0800 Subject: [PATCH 04/65] refactor code --- include/common/tmsgdef.h | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 61b471912f..6c7877b49e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -26,10 +26,10 @@ #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) "null", #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) MSG, MSG "-rsp", - #define TD_CLOSE_MSG_TYPE(TYPE) + #define TD_CLOSE_MSG_SEG(TYPE) char *tMsgInfo[] = { @@ -37,20 +37,20 @@ #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) - #define TD_CLOSE_MSG_TYPE(TYPE) TYPE, + #define TD_CLOSE_MSG_SEG(TYPE) TYPE, int32_t tMsgRangeDict[] = { #elif defined(TD_MSG_NUMBER_) #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) TYPE##_NUM, #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) TYPE##_NUM, TYPE##_RSP_NUM, - #define TD_CLOSE_MSG_TYPE(TYPE) + #define TD_CLOSE_MSG_SEG(TYPE) enum { @@ -58,10 +58,10 @@ #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) TYPE##_NUM, #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) - #define TD_CLOSE_MSG_TYPE(type) + #define TD_CLOSE_MSG_SEG(type) int32_t tMsgDict[] = { @@ -70,10 +70,10 @@ #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) TYPE##_SEG_CODE, #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) - #define TD_CLOSE_MSG_TYPE(TYPE) + #define TD_CLOSE_MSG_SEG(TYPE) enum { @@ -82,10 +82,10 @@ #undef TD_NEW_MSG_SEG #undef TD_DEF_MSG_TYPE - #undef TD_CLOSE_MSG_TYPE + #undef TD_CLOSE_MSG_SEG #define TD_NEW_MSG_SEG(TYPE) TYPE = ((TYPE##_SEG_CODE) << 8), #define TD_DEF_MSG_TYPE(TYPE, MSG, REQ, RSP) TYPE, TYPE##_RSP, - #define TD_CLOSE_MSG_TYPE(TYPE) TYPE, + #define TD_CLOSE_MSG_SEG(TYPE) TYPE, enum { // WARN: new msg should be appended to segment tail #endif @@ -109,7 +109,7 @@ TD_DEF_MSG_TYPE(TDMT_DND_ALTER_VNODE_TYPE, "dnode-alter-vnode-type", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP, "dnode-check-vnode-learner-catchup", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_MAX_MSG, "dnd-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_DND_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_DND_MSG) TD_NEW_MSG_SEG(TDMT_MND_MSG) // 1<<8 TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL) @@ -218,7 +218,7 @@ TD_DEF_MSG_TYPE(TDMT_MND_DROP_VIEW, "drop-view", SCMDropViewReq, NULL) TD_DEF_MSG_TYPE(TDMT_MND_VIEW_META, "view-meta", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_MND_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_MND_MSG) TD_NEW_MSG_SEG(TDMT_VND_MSG) // 2<<8 TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT, "submit", SSubmitReq, SSubmitRsp) @@ -268,7 +268,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_INDEX, "vnode-drop-index", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DISABLE_WRITE, "vnode-disable-write", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_MAX_MSG, "vnd-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_VND_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_VND_MSG) TD_NEW_MSG_SEG(TDMT_SCH_MSG) // 3<<8 TD_DEF_MSG_TYPE(TDMT_SCH_QUERY, "query", NULL, NULL) @@ -283,7 +283,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_LINK_BROKEN, "link-broken", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_TASK_NOTIFY, "task-notify", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_MAX_MSG, "sch-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_SCH_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_SCH_MSG) TD_NEW_MSG_SEG(TDMT_STREAM_MSG) //4 << 8 @@ -301,11 +301,11 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_STOP, "stream-task-stop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_HTASK_DROP, "stream-htask-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_STREAM_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_MON_MSG) //5 << 8 TD_DEF_MSG_TYPE(TDMT_MON_MAX_MSG, "monitor-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_MON_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_MON_MSG) TD_NEW_MSG_SEG(TDMT_SYNC_MSG) //6 << 8 TD_DEF_MSG_TYPE(TDMT_SYNC_TIMEOUT, "sync-timer", NULL, NULL) @@ -337,7 +337,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_PREP_SNAPSHOT_REPLY, "sync-prep-snapshot-reply", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_MAX_MSG, "sync-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_SYNC_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_SYNC_MSG) TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) //7 << 8 @@ -348,7 +348,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_RESET, "vnode-stream-reset", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_VND_STREAM_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_VND_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) //8 << 8 TD_DEF_MSG_TYPE(TDMT_VND_TMQ_SUBSCRIBE, "vnode-tmq-subscribe", SMqRebVgReq, SMqRebVgRsp) @@ -362,7 +362,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_VG_WALINFO, "vnode-tmq-vg-walinfo", SMqPollReq, SMqDataBlkRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_VG_COMMITTEDINFO, "vnode-tmq-committedinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_MAX_MSG, "vnd-tmq-max", NULL, NULL) - TD_CLOSE_MSG_TYPE(TDMT_END_TMQ_MSG) + TD_CLOSE_MSG_SEG(TDMT_END_TMQ_MSG) TD_NEW_MSG_SEG(TDMT_MAX_MSG) // msg end mark From 1bb4d1b34d970666a5da654a22a4a7e6a0caa3fe Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 11:51:23 +0800 Subject: [PATCH 05/65] refactor code --- include/common/tmsgdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 6c7877b49e..a2f9a5c475 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -365,7 +365,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_END_TMQ_MSG) TD_NEW_MSG_SEG(TDMT_MAX_MSG) // msg end mark - + TD_CLOSE_MSG_SEG(TDMT_END_MAX_MSG) From 59c59362f3819c6cb1b666a37ee9c8c34a755e94 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 13:52:45 +0800 Subject: [PATCH 06/65] add http fast quit --- source/libs/transport/src/thttp.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index bbfccf9f70..b36731ab0b 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -194,18 +194,21 @@ static void httpDestroyMsg(SHttpMsg* msg) { static void httpMayDiscardMsg(SHttpModule* http, SAsyncItem* item) { SHttpMsg *msg = NULL, *quitMsg = NULL; - int8_t quit = atomic_load_8(&http->quit); - if (quit == 1) { - while (!QUEUE_IS_EMPTY(&item->qmsg)) { - queue* h = QUEUE_HEAD(&item->qmsg); - QUEUE_REMOVE(h); - msg = QUEUE_DATA(h, SHttpMsg, q); - if (!msg->quit) { - httpDestroyMsg(msg); - } else { - quitMsg = msg; - } + if (atomic_load_8(&http->quit) == 0) { + return; + } + + while (!QUEUE_IS_EMPTY(&item->qmsg)) { + queue* h = QUEUE_HEAD(&item->qmsg); + QUEUE_REMOVE(h); + msg = QUEUE_DATA(h, SHttpMsg, q); + if (!msg->quit) { + httpDestroyMsg(msg); + } else { + quitMsg = msg; } + } + if (quitMsg != NULL) { QUEUE_PUSH(&item->qmsg, &quitMsg->q); } } From 9c909fd7a2960644c9a8a707f06affb13724b56a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 15:49:21 +0800 Subject: [PATCH 07/65] refactor code --- source/libs/transport/src/thttp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index b36731ab0b..33d1a2565a 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -167,7 +167,8 @@ _OVER: static FORCE_INLINE int32_t taosBuildDstAddr(const char* server, uint16_t port, struct sockaddr_in* dest) { uint32_t ip = taosGetIpv4FromFqdn(server); if (ip == 0xffffffff) { - tError("http-report failed to get http server:%s since %s", server, errno == 0 ? "invalid http server" : terrstr()); + tError("http-report failed to get http server:%s since %s", server, + (terrno == 0 || errno == 0) ? "invalid http server" : terrstr()); return -1; } char buf[128] = {0}; @@ -519,9 +520,10 @@ static void transHttpDestroyHandle(void* handle) { taosMemoryFree(handle); } static void transHttpEnvInit() { httpRefMgt = taosOpenRef(1, transHttpDestroyHandle); - SHttpModule* http = taosMemoryMalloc(sizeof(SHttpModule)); + SHttpModule* http = taosMemoryCalloc(1, sizeof(SHttpModule)); http->loop = taosMemoryMalloc(sizeof(uv_loop_t)); http->connStatusTable = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + http->quit = 0; uv_loop_init(http->loop); From 90588fac6988d672d853a44842015a8a915a2b08 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 6 Dec 2023 16:06:41 +0800 Subject: [PATCH 08/65] refactor code --- source/libs/transport/src/thttp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index 33d1a2565a..96537a950e 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -167,8 +167,7 @@ _OVER: static FORCE_INLINE int32_t taosBuildDstAddr(const char* server, uint16_t port, struct sockaddr_in* dest) { uint32_t ip = taosGetIpv4FromFqdn(server); if (ip == 0xffffffff) { - tError("http-report failed to get http server:%s since %s", server, - (terrno == 0 || errno == 0) ? "invalid http server" : terrstr()); + tError("http-report failed to resolving domain names: %s", server); return -1; } char buf[128] = {0}; From 9e36b0dee4593247f5ca62753781cd46a9f55ff4 Mon Sep 17 00:00:00 2001 From: charles Date: Wed, 6 Dec 2023 18:04:26 +0800 Subject: [PATCH 09/65] add test case to create/drop same name db multiple times --- .../0-others/test_create_same_name_db.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/system-test/0-others/test_create_same_name_db.py diff --git a/tests/system-test/0-others/test_create_same_name_db.py b/tests/system-test/0-others/test_create_same_name_db.py new file mode 100644 index 0000000000..2b2c63af53 --- /dev/null +++ b/tests/system-test/0-others/test_create_same_name_db.py @@ -0,0 +1,38 @@ +import time +import os +import platform +import taos +import threading +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * + + +class TDTestCase: + """This test case is used to veirfy TD-25762 + """ + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.db_name = "db" + + def run(self): + try: + # create same name database multiple times + for i in range(100): + tdLog.debug(f"round {str(i+1)} create database {self.db_name}") + tdSql.execute(f"create database {self.db_name}") + tdLog.debug(f"round {str(i+1)} drop database {self.db_name}") + tdSql.execute(f"drop database {self.db_name}") + except Exception as ex: + tdLog.exit(str(ex)) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From aa1445742bb5ae27c1563d84eef3e1c91801dc5d Mon Sep 17 00:00:00 2001 From: Charles Date: Thu, 7 Dec 2023 08:06:45 +0800 Subject: [PATCH 10/65] Update cases.task --- tests/parallel_test/cases.task | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 39db02f869..cf4559100b 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -518,7 +518,6 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tagFilter.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/projectionDesc.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/ts_3405_3398_3423.py -N 3 -n 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/geometry.py ,,n,system-test,python3 ./test.py -f 2-query/queryQnode.py ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode1mnode.py From e429526789f4d79477508ad41b156795dd85e875 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 13 Nov 2023 12:05:55 +0800 Subject: [PATCH 11/65] refact: rename STSnapRange to STFileSetRange --- source/dnode/vnode/src/inc/tsdb.h | 14 +++++------ source/dnode/vnode/src/tsdb/tsdbFS2.c | 16 ++++++------- source/dnode/vnode/src/tsdb/tsdbFS2.h | 8 +++---- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 4 ++-- source/dnode/vnode/src/tsdb/tsdbFSet2.h | 6 ++--- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 28 +++++++++++----------- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 26 ++++++++++---------- 7 files changed, 51 insertions(+), 51 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 79f9caab33..9cc6cd1132 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -681,14 +681,14 @@ struct SDelFWriter { typedef struct STFileSet STFileSet; typedef TARRAY2(STFileSet *) TFileSetArray; -typedef struct STSnapRange STSnapRange; -typedef TARRAY2(STSnapRange *) TSnapRangeArray; // disjoint snap ranges +typedef struct STFileSetRange STFileSetRange; +typedef TARRAY2(STFileSetRange *) TFileSetRangeArray; // disjoint ranges // util -int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); -int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); -void tsdbSnapRangeArrayDestroy(TSnapRangeArray **ppSnap); -SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges); +int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TFileSetRangeArray *pSnapR); +int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TFileSetRangeArray *pSnapR); +void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray **ppSnap); +SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges); // snap partition list typedef TARRAY2(SVersionRange) SVerRangeList; @@ -699,7 +699,7 @@ STsdbSnapPartList *tsdbSnapPartListCreate(); void tsdbSnapPartListDestroy(STsdbSnapPartList **ppList); int32_t tSerializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); int32_t tDeserializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); -int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList *pList, TSnapRangeArray **ppRanges); +int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList *pList, TFileSetRangeArray **ppRanges); enum { TSDB_SNAP_RANGE_TYP_HEAD = 0, diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 635c53bbed..df4df18dc3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1072,7 +1072,7 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRanges, TFileSetArray **fsetArr, +int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TFileSetRangeArray *pRanges, TFileSetArray **fsetArr, TFileOpArray *fopArr) { int32_t code = 0; STFileSet *fset; @@ -1096,7 +1096,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRange int64_t ever = VERSION_MAX; if (pHash) { int32_t fid = fset->fid; - STSnapRange *u = taosHashGet(pHash, &fid, sizeof(fid)); + STFileSetRange *u = taosHashGet(pHash, &fid, sizeof(fid)); if (u) { ever = u->sver - 1; } @@ -1123,7 +1123,7 @@ _out: return code; } -SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges) { +SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges) { int32_t capacity = TARRAY2_SIZE(pRanges) * 2; SHashObj *pHash = taosHashInit(capacity, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); if (pHash == NULL) { @@ -1132,7 +1132,7 @@ SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges) { } for (int32_t i = 0; i < TARRAY2_SIZE(pRanges); i++) { - STSnapRange *u = TARRAY2_GET(pRanges, i); + STFileSetRange *u = TARRAY2_GET(pRanges, i); int32_t fid = u->fid; int32_t code = taosHashPut(pHash, &fid, sizeof(fid), u, sizeof(*u)); ASSERT(code == 0); @@ -1141,11 +1141,11 @@ SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges) { return pHash; } -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, - TSnapRangeArray **fsrArr) { +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, + TFileSetRangeArray **fsrArr) { int32_t code = 0; STFileSet *fset; - STSnapRange *fsr1 = NULL; + STFileSetRange *fsr1 = NULL; SHashObj *pHash = NULL; fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); @@ -1170,7 +1170,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev if (pHash) { int32_t fid = fset->fid; - STSnapRange *u = taosHashGet(pHash, &fid, sizeof(fid)); + STFileSetRange *u = taosHashGet(pHash, &fid, sizeof(fid)); if (u) { sver1 = u->sver; tsdbDebug("range hash get fid:%d, sver:%" PRId64 ", ever:%" PRId64, u->fid, u->sver, u->ever); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 74453126cf..8fdce9e690 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -44,12 +44,12 @@ int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSCreateRefSnapshotWithoutLock(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); -int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclude, TFileSetArray **fsetArr, +int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TFileSetRangeArray *pExclude, TFileSetArray **fsetArr, TFileOpArray *fopArr); int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr, TFileOpArray *fopArr); -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, - TSnapRangeArray **fsrArr); -int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr); +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, + TFileSetRangeArray **fsrArr); +int32_t tsdbFSDestroyRefRangedSnapshot(TFileSetRangeArray **fsrArr); // txn int64_t tsdbFSAllocEid(STFileSystem *fs); int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 1bf886e3b0..7673299e4b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -533,7 +533,7 @@ int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_ return 0; } -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr) { +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STFileSetRange **fsr) { fsr[0] = taosMemoryCalloc(1, sizeof(*fsr[0])); if (fsr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; fsr[0]->fid = fset1->fid; @@ -575,7 +575,7 @@ int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } -int32_t tsdbTSnapRangeClear(STSnapRange **fsr) { +int32_t tsdbTSnapRangeClear(STFileSetRange **fsr) { if (!fsr[0]) return 0; tsdbTFileSetClear(&fsr[0]->fset); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 83f5b1e83c..3a6427a42c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -49,8 +49,8 @@ int32_t tsdbTFileSetRemove(STFileSet *fset); int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_t ever, STFileSet **fset, TFileOpArray *fopArr); -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr); -int32_t tsdbTSnapRangeClear(STSnapRange **fsr); +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STFileSetRange **fsr); +int32_t tsdbTSnapRangeClear(STFileSetRange **fsr); // to/from json int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json); @@ -101,7 +101,7 @@ struct STFileSet { bool blockCommit; }; -struct STSnapRange { +struct STFileSetRange { int32_t fid; int64_t sver; int64_t ever; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index e757daa0af..9d3efcfdae 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -32,12 +32,12 @@ struct STsdbSnapReader { uint8_t* aBuf[5]; SSkmInfo skmTb[1]; - TSnapRangeArray* fsrArr; + TFileSetRangeArray* fsrArr; // context struct { int32_t fsrArrIdx; - STSnapRange* fsr; + STFileSetRange* fsr; bool isDataDone; bool isTombDone; } ctx[1]; @@ -437,14 +437,14 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; - code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pRanges, &reader[0]->fsrArr); + code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TFileSetRangeArray*)pRanges, &reader[0]->fsrArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - tsdbSnapRangeArrayDestroy(&reader[0]->fsrArr); + tsdbFileSetRangeArrayDestroy(&reader[0]->fsrArr); taosMemoryFree(reader[0]); reader[0] = NULL; } else { @@ -472,7 +472,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); tsdbDataFileReaderClose(&reader[0]->dataReader); - tsdbSnapRangeArrayDestroy(&reader[0]->fsrArr); + tsdbFileSetRangeArrayDestroy(&reader[0]->fsrArr); tDestroyTSchema(reader[0]->skmTb->pTSchema); for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { @@ -1061,7 +1061,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRang writer[0]->compactVersion = INT64_MAX; writer[0]->now = taosGetTimestampMs(); - code = tsdbFSCreateCopyRangedSnapshot(pTsdb->pFS, (TSnapRangeArray*)pRanges, &writer[0]->fsetArr, writer[0]->fopArr); + code = tsdbFSCreateCopyRangedSnapshot(pTsdb->pFS, (TFileSetRangeArray*)pRanges, &writer[0]->fsetArr, writer[0]->fopArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: @@ -1168,7 +1168,7 @@ _exit: return code; } -// snap part +// STsdbSnapPartition ===================================== static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { if (x->fid < y->fid) return -1; if (x->fid > y->fid) return 1; @@ -1183,7 +1183,7 @@ static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { return 0; } -static int32_t tsdbSnapRangeCmprFn(STSnapRange* x, STSnapRange* y) { +static int32_t tsdbFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { if (x->fid < y->fid) return -1; if (x->fid > y->fid) return 1; return 0; @@ -1462,8 +1462,8 @@ _err: return -1; } -int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** ppRanges) { - TSnapRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); +int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TFileSetRangeArray** ppRanges) { + TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); if (pDiff == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -1472,7 +1472,7 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** STsdbSnapPartition* part; TARRAY2_FOREACH(pList, part) { - STSnapRange* r = taosMemoryCalloc(1, sizeof(STSnapRange)); + STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); if (r == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -1493,7 +1493,7 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** r->sver = maxVerValid + 1; r->ever = VERSION_MAX; tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); - int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbSnapRangeCmprFn); + int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbFileSetRangeCmprFn); ASSERT(code == 0); } ppRanges[0] = pDiff; @@ -1503,12 +1503,12 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** _err: if (pDiff) { - tsdbSnapRangeArrayDestroy(&pDiff); + tsdbFileSetRangeArrayDestroy(&pDiff); } return -1; } -void tsdbSnapRangeArrayDestroy(TSnapRangeArray** ppSnap) { +void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray** ppSnap) { if (ppSnap && ppSnap[0]) { TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); taosMemoryFree(ppSnap[0]); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 34b508388f..f65d9085fd 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -29,7 +29,7 @@ struct SVSnapReader { SMetaSnapReader *pMetaReader; // tsdb int8_t tsdbDone; - TSnapRangeArray *pRanges; + TFileSetRangeArray *pRanges; STsdbSnapReader *pTsdbReader; // tq int8_t tqHandleDone; @@ -45,11 +45,11 @@ struct SVSnapReader { SStreamStateReader *pStreamStateReader; // rsma int8_t rsmaDone; - TSnapRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; + TFileSetRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; SRSmaSnapReader *pRsmaReader; }; -static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TSnapRangeArray **ppRanges) { +static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TFileSetRangeArray **ppRanges) { int32_t code = -1; STsdbSnapPartList *pList = tsdbSnapPartListCreate(); if (pList == NULL) { @@ -69,7 +69,7 @@ _out: return code; } -static TSnapRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int32_t tsdbTyp) { +static TFileSetRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int32_t tsdbTyp) { ASSERTS(sizeof(pReader->pRsmaRanges) / sizeof(pReader->pRsmaRanges[0]) == 2, "Unexpected array size"); switch (tsdbTyp) { case SNAP_DATA_TSDB: @@ -94,7 +94,7 @@ static int32_t vnodeSnapReaderDoSnapInfo(SVSnapReader *pReader, SSnapshotParam * goto _out; } - TSnapRangeArray **ppRanges = NULL; + TFileSetRangeArray **ppRanges = NULL; int32_t offset = 0; while (offset + sizeof(SSyncTLV) < datHead->len) { @@ -152,9 +152,9 @@ _err: static void vnodeSnapReaderDestroyTsdbRanges(SVSnapReader *pReader) { int32_t tsdbTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { - TSnapRangeArray **ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, tsdbTyps[j]); + TFileSetRangeArray **ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, tsdbTyps[j]); if (ppRanges == NULL) continue; - tsdbSnapRangeArrayDestroy(ppRanges); + tsdbFileSetRangeArrayDestroy(ppRanges); } } @@ -455,7 +455,7 @@ struct SVSnapWriter { // meta SMetaSnapWriter *pMetaSnapWriter; // tsdb - TSnapRangeArray *pRanges; + TFileSetRangeArray *pRanges; STsdbSnapWriter *pTsdbSnapWriter; // tq STqSnapWriter *pTqSnapWriter; @@ -465,11 +465,11 @@ struct SVSnapWriter { SStreamTaskWriter *pStreamTaskWriter; SStreamStateWriter *pStreamStateWriter; // rsma - TSnapRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; + TFileSetRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; SRSmaSnapWriter *pRsmaSnapWriter; }; -TSnapRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t tsdbTyp) { +TFileSetRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t tsdbTyp) { ASSERTS(sizeof(pWriter->pRsmaRanges) / sizeof(pWriter->pRsmaRanges[0]) == 2, "Unexpected array size"); switch (tsdbTyp) { case SNAP_DATA_TSDB: @@ -494,7 +494,7 @@ static int32_t vnodeSnapWriterDoSnapInfo(SVSnapWriter *pWriter, SSnapshotParam * goto _out; } - TSnapRangeArray **ppRanges = NULL; + TFileSetRangeArray **ppRanges = NULL; int32_t offset = 0; while (offset + sizeof(SSyncTLV) < datHead->len) { @@ -576,9 +576,9 @@ _err: static void vnodeSnapWriterDestroyTsdbRanges(SVSnapWriter *pWriter) { int32_t tsdbTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { - TSnapRangeArray **ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, tsdbTyps[j]); + TFileSetRangeArray **ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, tsdbTyps[j]); if (ppRanges == NULL) continue; - tsdbSnapRangeArrayDestroy(ppRanges); + tsdbFileSetRangeArrayDestroy(ppRanges); } } From ddcabcfa4afc3f5a46b3ab6c1e40c5647ba17f51 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 13 Nov 2023 14:39:28 +0800 Subject: [PATCH 12/65] refact: remove unused fn tSerializeSnapRangeArray --- source/dnode/vnode/src/inc/tsdb.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 9cc6cd1132..99b92ace3a 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -685,8 +685,6 @@ typedef struct STFileSetRange STFileSetRange; typedef TARRAY2(STFileSetRange *) TFileSetRangeArray; // disjoint ranges // util -int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TFileSetRangeArray *pSnapR); -int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TFileSetRangeArray *pSnapR); void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray **ppSnap); SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges); From 3ad68bdfea9c14acf1a886a85af65e25c2f42771 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 17:35:28 +0800 Subject: [PATCH 13/65] enh: add tsdbSnapInfo.c --- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 source/dnode/vnode/src/tsdb/tsdbSnapInfo.c diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c new file mode 100644 index 0000000000..e69de29bb2 From b8b53a4b0e7f847dbb65c2e9f93aa4192b4047ef Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 17:53:16 +0800 Subject: [PATCH 14/65] refact: put source code of snap info into tsdbSnapInfo.c --- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 456 +++++++++++++++++++++ source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 436 -------------------- 2 files changed, 456 insertions(+), 436 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index e69de29bb2..e70d0eaabc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" + +// STsdbSnapPartition ===================================== +static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { + if (x->fid < y->fid) return -1; + if (x->fid > y->fid) return 1; + return 0; +} + +static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { + if (x->minVer < y->minVer) return -1; + if (x->minVer > y->minVer) return 1; + if (x->maxVer < y->maxVer) return -1; + if (x->maxVer > y->maxVer) return 1; + return 0; +} + +static int32_t tsdbFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { + if (x->fid < y->fid) return -1; + if (x->fid > y->fid) return 1; + return 0; +} + +STsdbSnapPartition* tsdbSnapPartitionCreate() { + STsdbSnapPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbSnapPartition)); + if (pSP == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + TARRAY2_INIT(&pSP->verRanges[i]); + } + return pSP; +} + +void tsdbSnapPartitionClear(STsdbSnapPartition** ppSP) { + if (ppSP == NULL || ppSP[0] == NULL) { + return; + } + for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + TARRAY2_DESTROY(&ppSP[0]->verRanges[i], NULL); + } + taosMemoryFree(ppSP[0]); + ppSP[0] = NULL; +} + +static int32_t tsdbFTypeToSRangeTyp(tsdb_ftype_t ftype) { + switch (ftype) { + case TSDB_FTYPE_HEAD: + return TSDB_SNAP_RANGE_TYP_HEAD; + case TSDB_FTYPE_DATA: + return TSDB_SNAP_RANGE_TYP_DATA; + case TSDB_FTYPE_SMA: + return TSDB_SNAP_RANGE_TYP_SMA; + case TSDB_FTYPE_TOMB: + return TSDB_SNAP_RANGE_TYP_TOMB; + case TSDB_FTYPE_STT: + return TSDB_SNAP_RANGE_TYP_STT; + } + return TSDB_SNAP_RANGE_TYP_MAX; +} + +static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP) { + STsdbSnapPartition* p = tsdbSnapPartitionCreate(); + if (p == NULL) { + goto _err; + } + + p->fid = fset->fid; + + int32_t code = 0; + int32_t typ = 0; + int32_t corrupt = false; + int32_t count = 0; + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + typ = tsdbFTypeToSRangeTyp(ftype); + ASSERT(typ < TSDB_SNAP_RANGE_TYP_MAX); + STFile* f = fset->farr[ftype]->f; + if (f->maxVer > fset->maxVerValid) { + corrupt = true; + tsdbError("skip incomplete data file: fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 + ", ftype: %d", + fset->fid, fset->maxVerValid, f->minVer, f->maxVer, ftype); + continue; + } + count++; + SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); + } + + typ = TSDB_SNAP_RANGE_TYP_STT; + const SSttLvl* lvl; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + STFile* f = fobj->f; + if (f->maxVer > fset->maxVerValid) { + corrupt = true; + tsdbError("skip incomplete stt file.fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 + ", ftype: %d", + fset->fid, fset->maxVerValid, f->minVer, f->maxVer, typ); + continue; + } + count++; + SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); + } + } + if (corrupt && count == 0) { + SVersionRange vr = {.minVer = VERSION_MIN, .maxVer = fset->maxVerValid}; + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); + } + ppSP[0] = p; + return 0; + +_err: + tsdbSnapPartitionClear(&p); + return -1; +} + +STsdbSnapPartList* tsdbSnapPartListCreate() { + STsdbSnapPartList* pList = taosMemoryCalloc(1, sizeof(STsdbSnapPartList)); + if (pList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + TARRAY2_INIT(pList); + return pList; +} + +static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { + STsdbSnapPartList* pList = tsdbSnapPartListCreate(); + if (pList == NULL) { + return NULL; + } + + int32_t code = 0; + taosThreadMutexLock(&fs->tsdb->mutex); + STFileSet* fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + STsdbSnapPartition* pItem = NULL; + if (tsdbTFileSetToSnapPart(fset, &pItem) < 0) { + code = -1; + break; + } + ASSERT(pItem != NULL); + code = TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); + ASSERT(code == 0); + } + taosThreadMutexUnlock(&fs->tsdb->mutex); + + if (code) { + TARRAY2_DESTROY(pList, tsdbSnapPartitionClear); + taosMemoryFree(pList); + pList = NULL; + } + return pList; +} + +int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { + int32_t hdrLen = sizeof(int32_t); + int32_t datLen = 0; + + int8_t msgVer = 1; + int32_t len = TARRAY2_SIZE(pList); + hdrLen += sizeof(msgVer); + hdrLen += sizeof(len); + datLen += hdrLen; + + for (int32_t u = 0; u < len; u++) { + STsdbSnapPartition* p = TARRAY2_GET(pList, u); + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + int32_t uItem = 0; + uItem += sizeof(STsdbSnapPartition); + uItem += sizeof(typMax); + + for (int32_t i = 0; i < typMax; i++) { + int32_t iLen = TARRAY2_SIZE(&p->verRanges[i]); + int32_t jItem = 0; + jItem += sizeof(SVersionRange); + jItem += sizeof(int64_t); + uItem += sizeof(iLen) + jItem * iLen; + } + datLen += uItem; + } + return datLen; +} + +int32_t tSerializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + + int8_t reserved8 = 0; + int16_t reserved16 = 0; + int64_t reserved64 = 0; + + int8_t msgVer = 1; + int32_t len = TARRAY2_SIZE(pList); + + if (tStartEncode(&encoder) < 0) goto _err; + if (tEncodeI8(&encoder, msgVer) < 0) goto _err; + if (tEncodeI32(&encoder, len) < 0) goto _err; + + for (int32_t u = 0; u < len; u++) { + STsdbSnapPartition* p = TARRAY2_GET(pList, u); + if (tEncodeI64(&encoder, p->fid) < 0) goto _err; + if (tEncodeI8(&encoder, p->stat) < 0) goto _err; + if (tEncodeI8(&encoder, reserved8) < 0) goto _err; + if (tEncodeI16(&encoder, reserved16) < 0) goto _err; + + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + if (tEncodeI32(&encoder, typMax) < 0) goto _err; + + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &p->verRanges[i]; + int32_t iLen = TARRAY2_SIZE(iList); + + if (tEncodeI32(&encoder, iLen) < 0) goto _err; + for (int32_t j = 0; j < iLen; j++) { + SVersionRange r = TARRAY2_GET(iList, j); + if (tEncodeI64(&encoder, r.minVer) < 0) goto _err; + if (tEncodeI64(&encoder, r.maxVer) < 0) goto _err; + if (tEncodeI64(&encoder, reserved64) < 0) goto _err; + } + } + } + + tEndEncode(&encoder); + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + return tlen; + +_err: + tEncoderClear(&encoder); + return -1; +} + +int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); + + int8_t reserved8 = 0; + int16_t reserved16 = 0; + int64_t reserved64 = 0; + + STsdbSnapPartition* p = NULL; + + int8_t msgVer = 0; + int32_t len = 0; + if (tStartDecode(&decoder) < 0) goto _err; + if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; + if (tDecodeI32(&decoder, &len) < 0) goto _err; + + for (int32_t u = 0; u < len; u++) { + p = tsdbSnapPartitionCreate(); + if (p == NULL) goto _err; + if (tDecodeI64(&decoder, &p->fid) < 0) goto _err; + if (tDecodeI8(&decoder, &p->stat) < 0) goto _err; + if (tDecodeI8(&decoder, &reserved8) < 0) goto _err; + if (tDecodeI16(&decoder, &reserved16) < 0) goto _err; + + int32_t typMax = 0; + if (tDecodeI32(&decoder, &typMax) < 0) goto _err; + + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &p->verRanges[i]; + int32_t iLen = 0; + if (tDecodeI32(&decoder, &iLen) < 0) goto _err; + for (int32_t j = 0; j < iLen; j++) { + SVersionRange r = {0}; + if (tDecodeI64(&decoder, &r.minVer) < 0) goto _err; + if (tDecodeI64(&decoder, &r.maxVer) < 0) goto _err; + if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; + TARRAY2_APPEND(iList, r); + } + } + TARRAY2_APPEND(pList, p); + p = NULL; + } + + tEndDecode(&decoder); + tDecoderClear(&decoder); + return 0; + +_err: + if (p) { + tsdbSnapPartitionClear(&p); + } + tDecoderClear(&decoder); + return -1; +} + +int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TFileSetRangeArray** ppRanges) { + TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); + if (pDiff == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + TARRAY2_INIT(pDiff); + + STsdbSnapPartition* part; + TARRAY2_FOREACH(pList, part) { + STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); + if (r == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + int64_t maxVerValid = -1; + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &part->verRanges[i]; + SVersionRange vr = {0}; + TARRAY2_FOREACH(iList, vr) { + if (vr.maxVer < vr.minVer) { + continue; + } + maxVerValid = TMAX(maxVerValid, vr.maxVer); + } + } + r->fid = part->fid; + r->sver = maxVerValid + 1; + r->ever = VERSION_MAX; + tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); + int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbFileSetRangeCmprFn); + ASSERT(code == 0); + } + ppRanges[0] = pDiff; + + tsdbInfo("pDiff size:%d", TARRAY2_SIZE(pDiff)); + return 0; + +_err: + if (pDiff) { + tsdbFileSetRangeArrayDestroy(&pDiff); + } + return -1; +} + +void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray** ppSnap) { + if (ppSnap && ppSnap[0]) { + TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); + taosMemoryFree(ppSnap[0]); + ppSnap[0] = NULL; + } +} + +void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { + if (ppList == NULL || ppList[0] == NULL) return; + + TARRAY2_DESTROY(ppList[0], tsdbSnapPartitionClear); + taosMemoryFree(ppList[0]); + ppList[0] = NULL; +} + +ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { + if (!VND_IS_RSMA(pVnode)) { + return pVnode->pTsdb->pFS->fsstate; + } + for (int32_t lvl = 0; lvl < TSDB_RETENTION_MAX; ++lvl) { + STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, lvl); + if (pTsdb && pTsdb->pFS->fsstate != TSDB_FS_STATE_NORMAL) { + return TSDB_FS_STATE_INCOMPLETE; + } + } + return TSDB_FS_STATE_NORMAL; +} + +int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { + int code = -1; + int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); + int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; + STsdbSnapPartList* pLists[TSDB_RETENTION_MAX] = {0}; + + // get part list + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, j); + pLists[j] = tsdbGetSnapPartList(pTsdb->pFS); + if (pLists[j] == NULL) goto _out; + } + + // estimate bufLen and prepare + int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + bufLen += sizeof(SSyncTLV); // subTyps[j] + bufLen += tTsdbSnapPartListDataLenCalc(pLists[j]); + } + + tsdbInfo("vgId:%d, allocate %d bytes for data of snapshot info.", TD_VID(pVnode), bufLen); + + void* data = taosMemoryRealloc(pSnap->data, bufLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tsdbError("vgId:%d, failed to realloc memory for data of snapshot info. bytes:%d", TD_VID(pVnode), bufLen); + goto _out; + } + pSnap->data = data; + + // header + SSyncTLV* head = data; + head->len = 0; + head->typ = pSnap->type; + int32_t offset = sizeof(SSyncTLV); + int32_t tlen = 0; + + // fill snapshot info + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + // subHead + SSyncTLV* subHead = (void*)((char*)data + offset); + subHead->typ = subTyps[j]; + ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); + + if ((tlen = tSerializeTsdbSnapPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { + tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); + goto _out; + } + subHead->len = tlen; + offset += sizeof(SSyncTLV) + tlen; + } + + // total length of subfields + head->len = offset - sizeof(SSyncTLV); + ASSERT(offset <= bufLen); + code = 0; + +_out: + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + if (pLists[j] == NULL) continue; + tsdbSnapPartListDestroy(&pLists[j]); + } + + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 9d3efcfdae..48872404ed 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1167,439 +1167,3 @@ _exit: } return code; } - -// STsdbSnapPartition ===================================== -static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { - if (x->fid < y->fid) return -1; - if (x->fid > y->fid) return 1; - return 0; -} - -static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { - if (x->minVer < y->minVer) return -1; - if (x->minVer > y->minVer) return 1; - if (x->maxVer < y->maxVer) return -1; - if (x->maxVer > y->maxVer) return 1; - return 0; -} - -static int32_t tsdbFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { - if (x->fid < y->fid) return -1; - if (x->fid > y->fid) return 1; - return 0; -} - -STsdbSnapPartition* tsdbSnapPartitionCreate() { - STsdbSnapPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbSnapPartition)); - if (pSP == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { - TARRAY2_INIT(&pSP->verRanges[i]); - } - return pSP; -} - -void tsdbSnapPartitionClear(STsdbSnapPartition** ppSP) { - if (ppSP == NULL || ppSP[0] == NULL) { - return; - } - for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { - TARRAY2_DESTROY(&ppSP[0]->verRanges[i], NULL); - } - taosMemoryFree(ppSP[0]); - ppSP[0] = NULL; -} - -static int32_t tsdbFTypeToSRangeTyp(tsdb_ftype_t ftype) { - switch (ftype) { - case TSDB_FTYPE_HEAD: - return TSDB_SNAP_RANGE_TYP_HEAD; - case TSDB_FTYPE_DATA: - return TSDB_SNAP_RANGE_TYP_DATA; - case TSDB_FTYPE_SMA: - return TSDB_SNAP_RANGE_TYP_SMA; - case TSDB_FTYPE_TOMB: - return TSDB_SNAP_RANGE_TYP_TOMB; - case TSDB_FTYPE_STT: - return TSDB_SNAP_RANGE_TYP_STT; - } - return TSDB_SNAP_RANGE_TYP_MAX; -} - -static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP) { - STsdbSnapPartition* p = tsdbSnapPartitionCreate(); - if (p == NULL) { - goto _err; - } - - p->fid = fset->fid; - - int32_t code = 0; - int32_t typ = 0; - int32_t corrupt = false; - int32_t count = 0; - for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { - if (fset->farr[ftype] == NULL) continue; - typ = tsdbFTypeToSRangeTyp(ftype); - ASSERT(typ < TSDB_SNAP_RANGE_TYP_MAX); - STFile* f = fset->farr[ftype]->f; - if (f->maxVer > fset->maxVerValid) { - corrupt = true; - tsdbError("skip incomplete data file: fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 - ", ftype: %d", - fset->fid, fset->maxVerValid, f->minVer, f->maxVer, ftype); - continue; - } - count++; - SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; - code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); - ASSERT(code == 0); - } - - typ = TSDB_SNAP_RANGE_TYP_STT; - const SSttLvl* lvl; - TARRAY2_FOREACH(fset->lvlArr, lvl) { - STFileObj* fobj; - TARRAY2_FOREACH(lvl->fobjArr, fobj) { - STFile* f = fobj->f; - if (f->maxVer > fset->maxVerValid) { - corrupt = true; - tsdbError("skip incomplete stt file.fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 - ", ftype: %d", - fset->fid, fset->maxVerValid, f->minVer, f->maxVer, typ); - continue; - } - count++; - SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; - code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); - ASSERT(code == 0); - } - } - if (corrupt && count == 0) { - SVersionRange vr = {.minVer = VERSION_MIN, .maxVer = fset->maxVerValid}; - code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); - ASSERT(code == 0); - } - ppSP[0] = p; - return 0; - -_err: - tsdbSnapPartitionClear(&p); - return -1; -} - -STsdbSnapPartList* tsdbSnapPartListCreate() { - STsdbSnapPartList* pList = taosMemoryCalloc(1, sizeof(STsdbSnapPartList)); - if (pList == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - TARRAY2_INIT(pList); - return pList; -} - -static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { - STsdbSnapPartList* pList = tsdbSnapPartListCreate(); - if (pList == NULL) { - return NULL; - } - - int32_t code = 0; - taosThreadMutexLock(&fs->tsdb->mutex); - STFileSet* fset; - TARRAY2_FOREACH(fs->fSetArr, fset) { - STsdbSnapPartition* pItem = NULL; - if (tsdbTFileSetToSnapPart(fset, &pItem) < 0) { - code = -1; - break; - } - ASSERT(pItem != NULL); - code = TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); - ASSERT(code == 0); - } - taosThreadMutexUnlock(&fs->tsdb->mutex); - - if (code) { - TARRAY2_DESTROY(pList, tsdbSnapPartitionClear); - taosMemoryFree(pList); - pList = NULL; - } - return pList; -} - -int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { - int32_t hdrLen = sizeof(int32_t); - int32_t datLen = 0; - - int8_t msgVer = 1; - int32_t len = TARRAY2_SIZE(pList); - hdrLen += sizeof(msgVer); - hdrLen += sizeof(len); - datLen += hdrLen; - - for (int32_t u = 0; u < len; u++) { - STsdbSnapPartition* p = TARRAY2_GET(pList, u); - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; - int32_t uItem = 0; - uItem += sizeof(STsdbSnapPartition); - uItem += sizeof(typMax); - - for (int32_t i = 0; i < typMax; i++) { - int32_t iLen = TARRAY2_SIZE(&p->verRanges[i]); - int32_t jItem = 0; - jItem += sizeof(SVersionRange); - jItem += sizeof(int64_t); - uItem += sizeof(iLen) + jItem * iLen; - } - datLen += uItem; - } - return datLen; -} - -int32_t tSerializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { - SEncoder encoder = {0}; - tEncoderInit(&encoder, buf, bufLen); - - int8_t reserved8 = 0; - int16_t reserved16 = 0; - int64_t reserved64 = 0; - - int8_t msgVer = 1; - int32_t len = TARRAY2_SIZE(pList); - - if (tStartEncode(&encoder) < 0) goto _err; - if (tEncodeI8(&encoder, msgVer) < 0) goto _err; - if (tEncodeI32(&encoder, len) < 0) goto _err; - - for (int32_t u = 0; u < len; u++) { - STsdbSnapPartition* p = TARRAY2_GET(pList, u); - if (tEncodeI64(&encoder, p->fid) < 0) goto _err; - if (tEncodeI8(&encoder, p->stat) < 0) goto _err; - if (tEncodeI8(&encoder, reserved8) < 0) goto _err; - if (tEncodeI16(&encoder, reserved16) < 0) goto _err; - - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; - if (tEncodeI32(&encoder, typMax) < 0) goto _err; - - for (int32_t i = 0; i < typMax; i++) { - SVerRangeList* iList = &p->verRanges[i]; - int32_t iLen = TARRAY2_SIZE(iList); - - if (tEncodeI32(&encoder, iLen) < 0) goto _err; - for (int32_t j = 0; j < iLen; j++) { - SVersionRange r = TARRAY2_GET(iList, j); - if (tEncodeI64(&encoder, r.minVer) < 0) goto _err; - if (tEncodeI64(&encoder, r.maxVer) < 0) goto _err; - if (tEncodeI64(&encoder, reserved64) < 0) goto _err; - } - } - } - - tEndEncode(&encoder); - int32_t tlen = encoder.pos; - tEncoderClear(&encoder); - return tlen; - -_err: - tEncoderClear(&encoder); - return -1; -} - -int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { - SDecoder decoder = {0}; - tDecoderInit(&decoder, buf, bufLen); - - int8_t reserved8 = 0; - int16_t reserved16 = 0; - int64_t reserved64 = 0; - - STsdbSnapPartition* p = NULL; - - int8_t msgVer = 0; - int32_t len = 0; - if (tStartDecode(&decoder) < 0) goto _err; - if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; - if (tDecodeI32(&decoder, &len) < 0) goto _err; - - for (int32_t u = 0; u < len; u++) { - p = tsdbSnapPartitionCreate(); - if (p == NULL) goto _err; - if (tDecodeI64(&decoder, &p->fid) < 0) goto _err; - if (tDecodeI8(&decoder, &p->stat) < 0) goto _err; - if (tDecodeI8(&decoder, &reserved8) < 0) goto _err; - if (tDecodeI16(&decoder, &reserved16) < 0) goto _err; - - int32_t typMax = 0; - if (tDecodeI32(&decoder, &typMax) < 0) goto _err; - - for (int32_t i = 0; i < typMax; i++) { - SVerRangeList* iList = &p->verRanges[i]; - int32_t iLen = 0; - if (tDecodeI32(&decoder, &iLen) < 0) goto _err; - for (int32_t j = 0; j < iLen; j++) { - SVersionRange r = {0}; - if (tDecodeI64(&decoder, &r.minVer) < 0) goto _err; - if (tDecodeI64(&decoder, &r.maxVer) < 0) goto _err; - if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; - TARRAY2_APPEND(iList, r); - } - } - TARRAY2_APPEND(pList, p); - p = NULL; - } - - tEndDecode(&decoder); - tDecoderClear(&decoder); - return 0; - -_err: - if (p) { - tsdbSnapPartitionClear(&p); - } - tDecoderClear(&decoder); - return -1; -} - -int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TFileSetRangeArray** ppRanges) { - TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); - if (pDiff == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - TARRAY2_INIT(pDiff); - - STsdbSnapPartition* part; - TARRAY2_FOREACH(pList, part) { - STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); - if (r == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - int64_t maxVerValid = -1; - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; - for (int32_t i = 0; i < typMax; i++) { - SVerRangeList* iList = &part->verRanges[i]; - SVersionRange vr = {0}; - TARRAY2_FOREACH(iList, vr) { - if (vr.maxVer < vr.minVer) { - continue; - } - maxVerValid = TMAX(maxVerValid, vr.maxVer); - } - } - r->fid = part->fid; - r->sver = maxVerValid + 1; - r->ever = VERSION_MAX; - tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); - int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbFileSetRangeCmprFn); - ASSERT(code == 0); - } - ppRanges[0] = pDiff; - - tsdbInfo("pDiff size:%d", TARRAY2_SIZE(pDiff)); - return 0; - -_err: - if (pDiff) { - tsdbFileSetRangeArrayDestroy(&pDiff); - } - return -1; -} - -void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray** ppSnap) { - if (ppSnap && ppSnap[0]) { - TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); - taosMemoryFree(ppSnap[0]); - ppSnap[0] = NULL; - } -} - -void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { - if (ppList == NULL || ppList[0] == NULL) return; - - TARRAY2_DESTROY(ppList[0], tsdbSnapPartitionClear); - taosMemoryFree(ppList[0]); - ppList[0] = NULL; -} - -ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { - if (!VND_IS_RSMA(pVnode)) { - return pVnode->pTsdb->pFS->fsstate; - } - for (int32_t lvl = 0; lvl < TSDB_RETENTION_MAX; ++lvl) { - STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, lvl); - if (pTsdb && pTsdb->pFS->fsstate != TSDB_FS_STATE_NORMAL) { - return TSDB_FS_STATE_INCOMPLETE; - } - } - return TSDB_FS_STATE_NORMAL; -} - -int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { - int code = -1; - int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); - int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; - STsdbSnapPartList* pLists[TSDB_RETENTION_MAX] = {0}; - - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, j); - pLists[j] = tsdbGetSnapPartList(pTsdb->pFS); - if (pLists[j] == NULL) goto _out; - } - - // estimate bufLen and prepare - int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - bufLen += sizeof(SSyncTLV); // subTyps[j] - bufLen += tTsdbSnapPartListDataLenCalc(pLists[j]); - } - - tsdbInfo("vgId:%d, allocate %d bytes for data of snapshot info.", TD_VID(pVnode), bufLen); - - void* data = taosMemoryRealloc(pSnap->data, bufLen); - if (data == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tsdbError("vgId:%d, failed to realloc memory for data of snapshot info. bytes:%d", TD_VID(pVnode), bufLen); - goto _out; - } - pSnap->data = data; - - // header - SSyncTLV* head = data; - head->len = 0; - head->typ = pSnap->type; - int32_t offset = sizeof(SSyncTLV); - int32_t tlen = 0; - - // fill snapshot info - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - } - - // subHead - SSyncTLV* subHead = (void*)((char*)data + offset); - subHead->typ = subTyps[j]; - ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); - - if ((tlen = tSerializeTsdbSnapPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { - tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); - goto _out; - } - subHead->len = tlen; - offset += sizeof(SSyncTLV) + tlen; - } - - head->len = offset - sizeof(SSyncTLV); - ASSERT(offset <= bufLen); - code = 0; - -_out: - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - if (pLists[j] == NULL) continue; - tsdbSnapPartListDestroy(&pLists[j]); - } - - return code; -} \ No newline at end of file From 4487eeacb58eebdb2cc897a32e9c7dafab653e6a Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 19:09:32 +0800 Subject: [PATCH 15/65] refact: improve funcs dealing with SnapInfo for vnode snap reader and writer --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 100 ++++++++++++--------- 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index f65d9085fd..941660f776 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -16,6 +16,26 @@ #include "tsdb.h" #include "vnd.h" +static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TFileSetRangeArray **ppRanges) { + int32_t code = -1; + STsdbSnapPartList *pList = tsdbSnapPartListCreate(); + if (pList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { + goto _out; + } + code = 0; +_out: + tsdbSnapPartListDestroy(&pList); + return code; +} + // SVSnapReader ======================================================== struct SVSnapReader { SVnode *pVnode; @@ -49,26 +69,6 @@ struct SVSnapReader { SRSmaSnapReader *pRsmaReader; }; -static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TFileSetRangeArray **ppRanges) { - int32_t code = -1; - STsdbSnapPartList *pList = tsdbSnapPartListCreate(); - if (pList == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _out; - } - if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { - terrno = TSDB_CODE_INVALID_DATA_FMT; - goto _out; - } - if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { - goto _out; - } - code = 0; -_out: - tsdbSnapPartListDestroy(&pList); - return code; -} - static TFileSetRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int32_t tsdbTyp) { ASSERTS(sizeof(pReader->pRsmaRanges) / sizeof(pReader->pRsmaRanges[0]) == 2, "Unexpected array size"); switch (tsdbTyp) { @@ -83,7 +83,7 @@ static TFileSetRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, } } -static int32_t vnodeSnapReaderDoSnapInfo(SVSnapReader *pReader, SSnapshotParam *pParam) { +static int32_t vnodeSnapReaderDealWithSnapInfo(SVSnapReader *pReader, SSnapshotParam *pParam) { SVnode *pVnode = pReader->pVnode; int32_t code = -1; @@ -102,14 +102,24 @@ static int32_t vnodeSnapReaderDoSnapInfo(SVSnapReader *pReader, SSnapshotParam * offset += sizeof(SSyncTLV) + subField->len; void *buf = subField->val; int32_t bufLen = subField->len; - ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, subField->typ); - if (ppRanges == NULL) { - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); - goto _out; - } - if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { - vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); - goto _out; + + switch (subField->typ) { + case SNAP_DATA_TSDB: + case SNAP_DATA_RSMA1: + case SNAP_DATA_RSMA2: { + ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _out; + } + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } break; + default: + vError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), subField->typ); + goto _out; } } } @@ -135,7 +145,7 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader pReader->ever = ever; // snapshot info - if (vnodeSnapReaderDoSnapInfo(pReader, pParam) < 0) { + if (vnodeSnapReaderDealWithSnapInfo(pReader, pParam) < 0) { goto _err; } @@ -483,7 +493,7 @@ TFileSetRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t } } -static int32_t vnodeSnapWriterDoSnapInfo(SVSnapWriter *pWriter, SSnapshotParam *pParam) { +static int32_t vnodeSnapWriterDealWithSnapInfo(SVSnapWriter *pWriter, SSnapshotParam *pParam) { SVnode *pVnode = pWriter->pVnode; int32_t code = -1; @@ -502,14 +512,24 @@ static int32_t vnodeSnapWriterDoSnapInfo(SVSnapWriter *pWriter, SSnapshotParam * offset += sizeof(SSyncTLV) + subField->len; void *buf = subField->val; int32_t bufLen = subField->len; - ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, subField->typ); - if (ppRanges == NULL) { - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); - goto _out; - } - if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { - vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); - goto _out; + + switch (subField->typ) { + case SNAP_DATA_TSDB: + case SNAP_DATA_RSMA1: + case SNAP_DATA_RSMA2: { + ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _out; + } + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } break; + default: + vError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), subField->typ); + goto _out; } } } @@ -558,7 +578,7 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter pWriter->commitID = ++pVnode->state.commitID; // snapshot info - if (vnodeSnapWriterDoSnapInfo(pWriter, pParam) < 0) { + if (vnodeSnapWriterDealWithSnapInfo(pWriter, pParam) < 0) { goto _err; } From 20b5cf8d49e98ff15c69d849fdfca07c69f53595 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 20:30:59 +0800 Subject: [PATCH 16/65] refact: remove redundent includes from tsdbSnapInfo.c --- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index e70d0eaabc..5aaa88511a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -14,11 +14,7 @@ */ #include "tsdb.h" -#include "tsdbDataFileRW.h" #include "tsdbFS2.h" -#include "tsdbFSetRW.h" -#include "tsdbIter.h" -#include "tsdbSttFileRW.h" // STsdbSnapPartition ===================================== static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { From ed916b1a80385223491f25e53a033e51bfd7e3fc Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 21:02:26 +0800 Subject: [PATCH 17/65] refact: rename tsdb snap partition to fset partition --- source/dnode/vnode/src/inc/tsdb.h | 16 +++--- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 65 +++++++++++----------- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 8 +-- 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 99b92ace3a..374753fcde 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -690,14 +690,14 @@ SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges); // snap partition list typedef TARRAY2(SVersionRange) SVerRangeList; -typedef struct STsdbSnapPartition STsdbSnapPartition; -typedef TARRAY2(STsdbSnapPartition *) STsdbSnapPartList; +typedef struct STsdbFSetPartition STsdbFSetPartition; +typedef TARRAY2(STsdbFSetPartition *) STsdbFSetPartList; // util -STsdbSnapPartList *tsdbSnapPartListCreate(); -void tsdbSnapPartListDestroy(STsdbSnapPartList **ppList); -int32_t tSerializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); -int32_t tDeserializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); -int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList *pList, TFileSetRangeArray **ppRanges); +STsdbFSetPartList *tsdbFSetPartListCreate(); +void tsdbFSetPartListDestroy(STsdbFSetPartList **ppList); +int32_t tSerializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); +int32_t tDeserializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); +int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList *pList, TFileSetRangeArray **ppRanges); enum { TSDB_SNAP_RANGE_TYP_HEAD = 0, @@ -708,7 +708,7 @@ enum { TSDB_SNAP_RANGE_TYP_MAX, }; -struct STsdbSnapPartition { +struct STsdbFSetPartition { int64_t fid; int8_t stat; SVerRangeList verRanges[TSDB_SNAP_RANGE_TYP_MAX]; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 5aaa88511a..c73d75030d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -16,8 +16,8 @@ #include "tsdb.h" #include "tsdbFS2.h" -// STsdbSnapPartition ===================================== -static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { +// STsdbFSetPartition ===================================== +static int32_t tsdbFSetPartCmprFn(STsdbFSetPartition* x, STsdbFSetPartition* y) { if (x->fid < y->fid) return -1; if (x->fid > y->fid) return 1; return 0; @@ -37,8 +37,8 @@ static int32_t tsdbFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { return 0; } -STsdbSnapPartition* tsdbSnapPartitionCreate() { - STsdbSnapPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbSnapPartition)); +STsdbFSetPartition* tsdbFSetPartitionCreate() { + STsdbFSetPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbFSetPartition)); if (pSP == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -49,7 +49,7 @@ STsdbSnapPartition* tsdbSnapPartitionCreate() { return pSP; } -void tsdbSnapPartitionClear(STsdbSnapPartition** ppSP) { +void tsdbFSetPartitionClear(STsdbFSetPartition** ppSP) { if (ppSP == NULL || ppSP[0] == NULL) { return; } @@ -76,8 +76,8 @@ static int32_t tsdbFTypeToSRangeTyp(tsdb_ftype_t ftype) { return TSDB_SNAP_RANGE_TYP_MAX; } -static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP) { - STsdbSnapPartition* p = tsdbSnapPartitionCreate(); +static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbFSetPartition** ppSP) { + STsdbFSetPartition* p = tsdbFSetPartitionCreate(); if (p == NULL) { goto _err; } @@ -134,12 +134,12 @@ static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP return 0; _err: - tsdbSnapPartitionClear(&p); + tsdbFSetPartitionClear(&p); return -1; } -STsdbSnapPartList* tsdbSnapPartListCreate() { - STsdbSnapPartList* pList = taosMemoryCalloc(1, sizeof(STsdbSnapPartList)); +STsdbFSetPartList* tsdbFSetPartListCreate() { + STsdbFSetPartList* pList = taosMemoryCalloc(1, sizeof(STsdbFSetPartList)); if (pList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -148,8 +148,8 @@ STsdbSnapPartList* tsdbSnapPartListCreate() { return pList; } -static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { - STsdbSnapPartList* pList = tsdbSnapPartListCreate(); +static STsdbFSetPartList* tsdbGetSnapPartList(STFileSystem* fs) { + STsdbFSetPartList* pList = tsdbFSetPartListCreate(); if (pList == NULL) { return NULL; } @@ -158,26 +158,26 @@ static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { taosThreadMutexLock(&fs->tsdb->mutex); STFileSet* fset; TARRAY2_FOREACH(fs->fSetArr, fset) { - STsdbSnapPartition* pItem = NULL; + STsdbFSetPartition* pItem = NULL; if (tsdbTFileSetToSnapPart(fset, &pItem) < 0) { code = -1; break; } ASSERT(pItem != NULL); - code = TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); + code = TARRAY2_SORT_INSERT(pList, pItem, tsdbFSetPartCmprFn); ASSERT(code == 0); } taosThreadMutexUnlock(&fs->tsdb->mutex); if (code) { - TARRAY2_DESTROY(pList, tsdbSnapPartitionClear); + TARRAY2_DESTROY(pList, tsdbFSetPartitionClear); taosMemoryFree(pList); pList = NULL; } return pList; } -int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { +int32_t tTsdbFSetPartListDataLenCalc(STsdbFSetPartList* pList) { int32_t hdrLen = sizeof(int32_t); int32_t datLen = 0; @@ -188,10 +188,10 @@ int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { datLen += hdrLen; for (int32_t u = 0; u < len; u++) { - STsdbSnapPartition* p = TARRAY2_GET(pList, u); + STsdbFSetPartition* p = TARRAY2_GET(pList, u); int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; int32_t uItem = 0; - uItem += sizeof(STsdbSnapPartition); + uItem += sizeof(STsdbFSetPartition); uItem += sizeof(typMax); for (int32_t i = 0; i < typMax; i++) { @@ -206,7 +206,7 @@ int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { return datLen; } -int32_t tSerializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { +int32_t tSerializeTsdbFSetPartList(void* buf, int32_t bufLen, STsdbFSetPartList* pList) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -222,7 +222,7 @@ int32_t tSerializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* if (tEncodeI32(&encoder, len) < 0) goto _err; for (int32_t u = 0; u < len; u++) { - STsdbSnapPartition* p = TARRAY2_GET(pList, u); + STsdbFSetPartition* p = TARRAY2_GET(pList, u); if (tEncodeI64(&encoder, p->fid) < 0) goto _err; if (tEncodeI8(&encoder, p->stat) < 0) goto _err; if (tEncodeI8(&encoder, reserved8) < 0) goto _err; @@ -255,7 +255,7 @@ _err: return -1; } -int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { +int32_t tDeserializeTsdbFSetPartList(void* buf, int32_t bufLen, STsdbFSetPartList* pList) { SDecoder decoder = {0}; tDecoderInit(&decoder, buf, bufLen); @@ -263,7 +263,7 @@ int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartLis int16_t reserved16 = 0; int64_t reserved64 = 0; - STsdbSnapPartition* p = NULL; + STsdbFSetPartition* p = NULL; int8_t msgVer = 0; int32_t len = 0; @@ -272,7 +272,7 @@ int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartLis if (tDecodeI32(&decoder, &len) < 0) goto _err; for (int32_t u = 0; u < len; u++) { - p = tsdbSnapPartitionCreate(); + p = tsdbFSetPartitionCreate(); if (p == NULL) goto _err; if (tDecodeI64(&decoder, &p->fid) < 0) goto _err; if (tDecodeI8(&decoder, &p->stat) < 0) goto _err; @@ -304,13 +304,13 @@ int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartLis _err: if (p) { - tsdbSnapPartitionClear(&p); + tsdbFSetPartitionClear(&p); } tDecoderClear(&decoder); return -1; } -int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TFileSetRangeArray** ppRanges) { +int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList* pList, TFileSetRangeArray** ppRanges) { TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); if (pDiff == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -318,7 +318,7 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TFileSetRangeArray } TARRAY2_INIT(pDiff); - STsdbSnapPartition* part; + STsdbFSetPartition* part; TARRAY2_FOREACH(pList, part) { STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); if (r == NULL) { @@ -364,10 +364,10 @@ void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray** ppSnap) { } } -void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { +void tsdbFSetPartListDestroy(STsdbFSetPartList** ppList) { if (ppList == NULL || ppList[0] == NULL) return; - TARRAY2_DESTROY(ppList[0], tsdbSnapPartitionClear); + TARRAY2_DESTROY(ppList[0], tsdbFSetPartitionClear); taosMemoryFree(ppList[0]); ppList[0] = NULL; } @@ -389,7 +389,7 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { int code = -1; int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; - STsdbSnapPartList* pLists[TSDB_RETENTION_MAX] = {0}; + STsdbFSetPartList* pLists[TSDB_RETENTION_MAX] = {0}; // get part list for (int32_t j = 0; j < tsdbMaxCnt; ++j) { @@ -402,7 +402,7 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY for (int32_t j = 0; j < tsdbMaxCnt; ++j) { bufLen += sizeof(SSyncTLV); // subTyps[j] - bufLen += tTsdbSnapPartListDataLenCalc(pLists[j]); + bufLen += tTsdbFSetPartListDataLenCalc(pLists[j]); } tsdbInfo("vgId:%d, allocate %d bytes for data of snapshot info.", TD_VID(pVnode), bufLen); @@ -429,7 +429,7 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { subHead->typ = subTyps[j]; ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); - if ((tlen = tSerializeTsdbSnapPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { + if ((tlen = tSerializeTsdbFSetPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); goto _out; } @@ -445,8 +445,9 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { _out: for (int32_t j = 0; j < tsdbMaxCnt; ++j) { if (pLists[j] == NULL) continue; - tsdbSnapPartListDestroy(&pLists[j]); + tsdbFSetPartListDestroy(&pLists[j]); } return code; } + diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 941660f776..bb3bd59971 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -18,21 +18,21 @@ static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TFileSetRangeArray **ppRanges) { int32_t code = -1; - STsdbSnapPartList *pList = tsdbSnapPartListCreate(); + STsdbFSetPartList *pList = tsdbFSetPartListCreate(); if (pList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _out; } - if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { + if (tDeserializeTsdbFSetPartList(buf, bufLen, pList) < 0) { terrno = TSDB_CODE_INVALID_DATA_FMT; goto _out; } - if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { + if (tsdbFSetPartListToRangeDiff(pList, ppRanges) < 0) { goto _out; } code = 0; _out: - tsdbSnapPartListDestroy(&pList); + tsdbFSetPartListDestroy(&pList); return code; } From 962febef02bea1c4c0ed1635e3d0c6cdc1ade0a4 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Dec 2023 16:05:14 +0800 Subject: [PATCH 18/65] refact: rename tsdb snap range to tsdb fset range --- source/dnode/vnode/src/inc/tsdb.h | 29 ++++++------- source/dnode/vnode/src/tsdb/tsdbFS2.c | 48 +++++++++++----------- source/dnode/vnode/src/tsdb/tsdbFS2.h | 2 +- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 14 ++++++- source/dnode/vnode/src/tsdb/tsdbFSet2.h | 4 +- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 13 ++---- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 4 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 +- 8 files changed, 61 insertions(+), 57 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 374753fcde..b0cf6ecf5b 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -681,24 +681,14 @@ struct SDelFWriter { typedef struct STFileSet STFileSet; typedef TARRAY2(STFileSet *) TFileSetArray; +// fset range typedef struct STFileSetRange STFileSetRange; typedef TARRAY2(STFileSetRange *) TFileSetRangeArray; // disjoint ranges -// util -void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray **ppSnap); -SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges); - -// snap partition list -typedef TARRAY2(SVersionRange) SVerRangeList; -typedef struct STsdbFSetPartition STsdbFSetPartition; -typedef TARRAY2(STsdbFSetPartition *) STsdbFSetPartList; -// util -STsdbFSetPartList *tsdbFSetPartListCreate(); -void tsdbFSetPartListDestroy(STsdbFSetPartList **ppList); -int32_t tSerializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); -int32_t tDeserializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); -int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList *pList, TFileSetRangeArray **ppRanges); +int32_t tsdbTFileSetRangeClear(STFileSetRange **fsr); +int32_t tsdbTFileSetRangeArrayDestroy(TFileSetRangeArray **ppArr); +// fset partition enum { TSDB_SNAP_RANGE_TYP_HEAD = 0, TSDB_SNAP_RANGE_TYP_DATA, @@ -708,12 +698,23 @@ enum { TSDB_SNAP_RANGE_TYP_MAX, }; +typedef TARRAY2(SVersionRange) SVerRangeList; + struct STsdbFSetPartition { int64_t fid; int8_t stat; SVerRangeList verRanges[TSDB_SNAP_RANGE_TYP_MAX]; }; +typedef struct STsdbFSetPartition STsdbFSetPartition; +typedef TARRAY2(STsdbFSetPartition *) STsdbFSetPartList; + +STsdbFSetPartList *tsdbFSetPartListCreate(); +void tsdbFSetPartListDestroy(STsdbFSetPartList **ppList); +int32_t tSerializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); +int32_t tDeserializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); +int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList *pList, TFileSetRangeArray **ppRanges); + // snap read struct STsdbReadSnap { SMemTable *pMem; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index df4df18dc3..ab52f5799d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1072,6 +1072,24 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } +static SHashObj *tsdbFSetRangeArrayToHash(TFileSetRangeArray *pRanges) { + int32_t capacity = TARRAY2_SIZE(pRanges) * 2; + SHashObj *pHash = taosHashInit(capacity, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (pHash == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + for (int32_t i = 0; i < TARRAY2_SIZE(pRanges); i++) { + STFileSetRange *u = TARRAY2_GET(pRanges, i); + int32_t fid = u->fid; + int32_t code = taosHashPut(pHash, &fid, sizeof(fid), u, sizeof(*u)); + ASSERT(code == 0); + tsdbDebug("range diff hash fid:%d, sver:%" PRId64 ", ever:%" PRId64, u->fid, u->sver, u->ever); + } + return pHash; +} + int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TFileSetRangeArray *pRanges, TFileSetArray **fsetArr, TFileOpArray *fopArr) { int32_t code = 0; @@ -1084,7 +1102,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TFileSetRangeArray *pRa TARRAY2_INIT(fsetArr[0]); if (pRanges) { - pHash = tsdbGetSnapRangeHash(pRanges); + pHash = tsdbFSetRangeArrayToHash(pRanges); if (pHash == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _out; @@ -1123,24 +1141,6 @@ _out: return code; } -SHashObj *tsdbGetSnapRangeHash(TFileSetRangeArray *pRanges) { - int32_t capacity = TARRAY2_SIZE(pRanges) * 2; - SHashObj *pHash = taosHashInit(capacity, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); - if (pHash == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - for (int32_t i = 0; i < TARRAY2_SIZE(pRanges); i++) { - STFileSetRange *u = TARRAY2_GET(pRanges, i); - int32_t fid = u->fid; - int32_t code = taosHashPut(pHash, &fid, sizeof(fid), u, sizeof(*u)); - ASSERT(code == 0); - tsdbDebug("range diff hash fid:%d, sver:%" PRId64 ", ever:%" PRId64, u->fid, u->sver, u->ever); - } - return pHash; -} - int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, TFileSetRangeArray **fsrArr) { int32_t code = 0; @@ -1156,7 +1156,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev tsdbInfo("pRanges size:%d", (pRanges == NULL ? 0 : TARRAY2_SIZE(pRanges))); if (pRanges) { - pHash = tsdbGetSnapRangeHash(pRanges); + pHash = tsdbFSetRangeArrayToHash(pRanges); if (pHash == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _out; @@ -1184,7 +1184,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev tsdbDebug("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); - code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); + code = tsdbTFileSetRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); if (code) break; code = TARRAY2_APPEND(fsrArr[0], fsr1); @@ -1195,8 +1195,8 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev taosThreadMutexUnlock(&fs->tsdb->mutex); if (code) { - tsdbTSnapRangeClear(&fsr1); - TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); + tsdbTFileSetRangeClear(&fsr1); + TARRAY2_DESTROY(fsrArr[0], tsdbTFileSetRangeClear); fsrArr[0] = NULL; } @@ -1206,4 +1206,4 @@ _out: pHash = NULL; } return code; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 8fdce9e690..3960ca908a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -50,7 +50,7 @@ int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr, TFileOpArray *f int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, TFileSetRangeArray **fsrArr); int32_t tsdbFSDestroyRefRangedSnapshot(TFileSetRangeArray **fsrArr); -// txn +// txn int64_t tsdbFSAllocEid(STFileSystem *fs); int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); int32_t tsdbFSEditCommit(STFileSystem *fs); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 7673299e4b..e088f54930 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -533,7 +533,8 @@ int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_ return 0; } -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STFileSetRange **fsr) { +int32_t tsdbTFileSetRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, + STFileSetRange **fsr) { fsr[0] = taosMemoryCalloc(1, sizeof(*fsr[0])); if (fsr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; fsr[0]->fid = fset1->fid; @@ -575,7 +576,7 @@ int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } -int32_t tsdbTSnapRangeClear(STFileSetRange **fsr) { +int32_t tsdbTFileSetRangeClear(STFileSetRange **fsr) { if (!fsr[0]) return 0; tsdbTFileSetClear(&fsr[0]->fset); @@ -584,6 +585,15 @@ int32_t tsdbTSnapRangeClear(STFileSetRange **fsr) { return 0; } +int32_t tsdbTFileSetRangeArrayDestroy(TFileSetRangeArray** ppArr) { + if (ppArr && ppArr[0]) { + TARRAY2_DESTROY(ppArr[0], tsdbTFileSetRangeClear); + taosMemoryFree(ppArr[0]); + ppArr[0] = NULL; + } + return 0; +} + int32_t tsdbTFileSetClear(STFileSet **fset) { if (!fset[0]) return 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 3a6427a42c..0951a28f4e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -49,8 +49,8 @@ int32_t tsdbTFileSetRemove(STFileSet *fset); int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_t ever, STFileSet **fset, TFileOpArray *fopArr); -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STFileSetRange **fsr); -int32_t tsdbTSnapRangeClear(STFileSetRange **fsr); +int32_t tsdbTFileSetRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, + STFileSetRange **fsr); // to/from json int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index c73d75030d..9d741ab2d3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -16,7 +16,7 @@ #include "tsdb.h" #include "tsdbFS2.h" -// STsdbFSetPartition ===================================== +// fset partition static int32_t tsdbFSetPartCmprFn(STsdbFSetPartition* x, STsdbFSetPartition* y) { if (x->fid < y->fid) return -1; if (x->fid > y->fid) return 1; @@ -138,6 +138,7 @@ _err: return -1; } +// fset partition list STsdbFSetPartList* tsdbFSetPartListCreate() { STsdbFSetPartList* pList = taosMemoryCalloc(1, sizeof(STsdbFSetPartList)); if (pList == NULL) { @@ -351,19 +352,11 @@ int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList* pList, TFileSetRangeArray _err: if (pDiff) { - tsdbFileSetRangeArrayDestroy(&pDiff); + tsdbTFileSetRangeArrayDestroy(&pDiff); } return -1; } -void tsdbFileSetRangeArrayDestroy(TFileSetRangeArray** ppSnap) { - if (ppSnap && ppSnap[0]) { - TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); - taosMemoryFree(ppSnap[0]); - ppSnap[0] = NULL; - } -} - void tsdbFSetPartListDestroy(STsdbFSetPartList** ppList) { if (ppList == NULL || ppList[0] == NULL) return; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 48872404ed..f890353320 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -444,7 +444,7 @@ _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - tsdbFileSetRangeArrayDestroy(&reader[0]->fsrArr); + tsdbTFileSetRangeArrayDestroy(&reader[0]->fsrArr); taosMemoryFree(reader[0]); reader[0] = NULL; } else { @@ -472,7 +472,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); tsdbDataFileReaderClose(&reader[0]->dataReader); - tsdbFileSetRangeArrayDestroy(&reader[0]->fsrArr); + tsdbTFileSetRangeArrayDestroy(&reader[0]->fsrArr); tDestroyTSchema(reader[0]->skmTb->pTSchema); for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index bb3bd59971..21c858709b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -164,7 +164,7 @@ static void vnodeSnapReaderDestroyTsdbRanges(SVSnapReader *pReader) { for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { TFileSetRangeArray **ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, tsdbTyps[j]); if (ppRanges == NULL) continue; - tsdbFileSetRangeArrayDestroy(ppRanges); + tsdbTFileSetRangeArrayDestroy(ppRanges); } } @@ -598,7 +598,7 @@ static void vnodeSnapWriterDestroyTsdbRanges(SVSnapWriter *pWriter) { for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { TFileSetRangeArray **ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, tsdbTyps[j]); if (ppRanges == NULL) continue; - tsdbFileSetRangeArrayDestroy(ppRanges); + tsdbTFileSetRangeArrayDestroy(ppRanges); } } From 3550347f0c9b1283701e7230c4d84c60a21097b3 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Dec 2023 16:29:48 +0800 Subject: [PATCH 19/65] refact: use tsdbFSDestroyCopyRangedSnapshot and tsdbFSDestroyRefRangedSnapshot --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 4 ++++ source/dnode/vnode/src/tsdb/tsdbFS2.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index ab52f5799d..e933b3a7dc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1141,6 +1141,8 @@ _out: return code; } +int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr) { return tsdbFSDestroyCopySnapshot(fsetArr); } + int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, TFileSetRangeArray **fsrArr) { int32_t code = 0; @@ -1207,3 +1209,5 @@ _out: } return code; } + +int32_t tsdbFSDestroyRefRangedSnapshot(TFileSetRangeArray **fsrArr) { return tsdbTFileSetRangeArrayDestroy(fsrArr); } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 3960ca908a..714bf5bf16 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -46,7 +46,7 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TFileSetRangeArray *pExclude, TFileSetArray **fsetArr, TFileOpArray *fopArr); -int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr, TFileOpArray *fopArr); +int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TFileSetRangeArray *pRanges, TFileSetRangeArray **fsrArr); int32_t tsdbFSDestroyRefRangedSnapshot(TFileSetRangeArray **fsrArr); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index f890353320..8f5394a9bc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -472,7 +472,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); tsdbDataFileReaderClose(&reader[0]->dataReader); - tsdbTFileSetRangeArrayDestroy(&reader[0]->fsrArr); + tsdbFSDestroyRefRangedSnapshot(&reader[0]->fsrArr); tDestroyTSchema(reader[0]->skmTb->pTSchema); for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { @@ -1125,7 +1125,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { tsdbDataFileReaderClose(&writer[0]->ctx->dataReader); TARRAY2_DESTROY(writer[0]->fopArr, NULL); - tsdbFSDestroyCopySnapshot(&writer[0]->fsetArr); + tsdbFSDestroyCopyRangedSnapshot(&writer[0]->fsetArr); for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->aBuf); ++i) { tFree(writer[0]->aBuf[i]); From ed39c9a57253be3ee7e5ffb5f8278414feb8dc64 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Dec 2023 17:21:27 +0800 Subject: [PATCH 20/65] refact: rename snap range to fset range in tsdbSnapInfo.c --- source/dnode/vnode/src/inc/tsdb.h | 14 +- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 179 +++++++++++---------- 2 files changed, 98 insertions(+), 95 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index b0cf6ecf5b..a5203353db 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -690,12 +690,12 @@ int32_t tsdbTFileSetRangeArrayDestroy(TFileSetRangeArray **ppArr); // fset partition enum { - TSDB_SNAP_RANGE_TYP_HEAD = 0, - TSDB_SNAP_RANGE_TYP_DATA, - TSDB_SNAP_RANGE_TYP_SMA, - TSDB_SNAP_RANGE_TYP_TOMB, - TSDB_SNAP_RANGE_TYP_STT, - TSDB_SNAP_RANGE_TYP_MAX, + TSDB_FSET_RANGE_TYP_HEAD = 0, + TSDB_FSET_RANGE_TYP_DATA, + TSDB_FSET_RANGE_TYP_SMA, + TSDB_FSET_RANGE_TYP_TOMB, + TSDB_FSET_RANGE_TYP_STT, + TSDB_FSET_RANGE_TYP_MAX, }; typedef TARRAY2(SVersionRange) SVerRangeList; @@ -703,7 +703,7 @@ typedef TARRAY2(SVersionRange) SVerRangeList; struct STsdbFSetPartition { int64_t fid; int8_t stat; - SVerRangeList verRanges[TSDB_SNAP_RANGE_TYP_MAX]; + SVerRangeList verRanges[TSDB_FSET_RANGE_TYP_MAX]; }; typedef struct STsdbFSetPartition STsdbFSetPartition; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 9d741ab2d3..8afbe187a4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -31,7 +31,7 @@ static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { return 0; } -static int32_t tsdbFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { +static int32_t tsdbTFileSetRangeCmprFn(STFileSetRange* x, STFileSetRange* y) { if (x->fid < y->fid) return -1; if (x->fid > y->fid) return 1; return 0; @@ -43,7 +43,7 @@ STsdbFSetPartition* tsdbFSetPartitionCreate() { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + for (int32_t i = 0; i < TSDB_FSET_RANGE_TYP_MAX; i++) { TARRAY2_INIT(&pSP->verRanges[i]); } return pSP; @@ -53,30 +53,30 @@ void tsdbFSetPartitionClear(STsdbFSetPartition** ppSP) { if (ppSP == NULL || ppSP[0] == NULL) { return; } - for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + for (int32_t i = 0; i < TSDB_FSET_RANGE_TYP_MAX; i++) { TARRAY2_DESTROY(&ppSP[0]->verRanges[i], NULL); } taosMemoryFree(ppSP[0]); ppSP[0] = NULL; } -static int32_t tsdbFTypeToSRangeTyp(tsdb_ftype_t ftype) { +static int32_t tsdbFTypeToFRangeType(tsdb_ftype_t ftype) { switch (ftype) { case TSDB_FTYPE_HEAD: - return TSDB_SNAP_RANGE_TYP_HEAD; + return TSDB_FSET_RANGE_TYP_HEAD; case TSDB_FTYPE_DATA: - return TSDB_SNAP_RANGE_TYP_DATA; + return TSDB_FSET_RANGE_TYP_DATA; case TSDB_FTYPE_SMA: - return TSDB_SNAP_RANGE_TYP_SMA; + return TSDB_FSET_RANGE_TYP_SMA; case TSDB_FTYPE_TOMB: - return TSDB_SNAP_RANGE_TYP_TOMB; + return TSDB_FSET_RANGE_TYP_TOMB; case TSDB_FTYPE_STT: - return TSDB_SNAP_RANGE_TYP_STT; + return TSDB_FSET_RANGE_TYP_STT; } - return TSDB_SNAP_RANGE_TYP_MAX; + return TSDB_FSET_RANGE_TYP_MAX; } -static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbFSetPartition** ppSP) { +static int32_t tsdbTFileSetToFSetPartition(STFileSet* fset, STsdbFSetPartition** ppSP) { STsdbFSetPartition* p = tsdbFSetPartitionCreate(); if (p == NULL) { goto _err; @@ -90,8 +90,8 @@ static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbFSetPartition** ppSP int32_t count = 0; for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { if (fset->farr[ftype] == NULL) continue; - typ = tsdbFTypeToSRangeTyp(ftype); - ASSERT(typ < TSDB_SNAP_RANGE_TYP_MAX); + typ = tsdbFTypeToFRangeType(ftype); + ASSERT(typ < TSDB_FSET_RANGE_TYP_MAX); STFile* f = fset->farr[ftype]->f; if (f->maxVer > fset->maxVerValid) { corrupt = true; @@ -106,7 +106,7 @@ static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbFSetPartition** ppSP ASSERT(code == 0); } - typ = TSDB_SNAP_RANGE_TYP_STT; + typ = TSDB_FSET_RANGE_TYP_STT; const SSttLvl* lvl; TARRAY2_FOREACH(fset->lvlArr, lvl) { STFileObj* fobj; @@ -149,35 +149,61 @@ STsdbFSetPartList* tsdbFSetPartListCreate() { return pList; } -static STsdbFSetPartList* tsdbGetSnapPartList(STFileSystem* fs) { - STsdbFSetPartList* pList = tsdbFSetPartListCreate(); - if (pList == NULL) { - return NULL; - } +void tsdbFSetPartListDestroy(STsdbFSetPartList** ppList) { + if (ppList == NULL || ppList[0] == NULL) return; - int32_t code = 0; - taosThreadMutexLock(&fs->tsdb->mutex); - STFileSet* fset; - TARRAY2_FOREACH(fs->fSetArr, fset) { - STsdbFSetPartition* pItem = NULL; - if (tsdbTFileSetToSnapPart(fset, &pItem) < 0) { - code = -1; - break; - } - ASSERT(pItem != NULL); - code = TARRAY2_SORT_INSERT(pList, pItem, tsdbFSetPartCmprFn); - ASSERT(code == 0); - } - taosThreadMutexUnlock(&fs->tsdb->mutex); - - if (code) { - TARRAY2_DESTROY(pList, tsdbFSetPartitionClear); - taosMemoryFree(pList); - pList = NULL; - } - return pList; + TARRAY2_DESTROY(ppList[0], tsdbFSetPartitionClear); + taosMemoryFree(ppList[0]); + ppList[0] = NULL; } +int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList* pList, TFileSetRangeArray** ppRanges) { + TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); + if (pDiff == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + TARRAY2_INIT(pDiff); + + STsdbFSetPartition* part; + TARRAY2_FOREACH(pList, part) { + STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); + if (r == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + int64_t maxVerValid = -1; + int32_t typMax = TSDB_FSET_RANGE_TYP_MAX; + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &part->verRanges[i]; + SVersionRange vr = {0}; + TARRAY2_FOREACH(iList, vr) { + if (vr.maxVer < vr.minVer) { + continue; + } + maxVerValid = TMAX(maxVerValid, vr.maxVer); + } + } + r->fid = part->fid; + r->sver = maxVerValid + 1; + r->ever = VERSION_MAX; + tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); + int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbTFileSetRangeCmprFn); + ASSERT(code == 0); + } + ppRanges[0] = pDiff; + + tsdbInfo("pDiff size:%d", TARRAY2_SIZE(pDiff)); + return 0; + +_err: + if (pDiff) { + tsdbTFileSetRangeArrayDestroy(&pDiff); + } + return -1; +} + +// serialization int32_t tTsdbFSetPartListDataLenCalc(STsdbFSetPartList* pList) { int32_t hdrLen = sizeof(int32_t); int32_t datLen = 0; @@ -190,7 +216,7 @@ int32_t tTsdbFSetPartListDataLenCalc(STsdbFSetPartList* pList) { for (int32_t u = 0; u < len; u++) { STsdbFSetPartition* p = TARRAY2_GET(pList, u); - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + int32_t typMax = TSDB_FSET_RANGE_TYP_MAX; int32_t uItem = 0; uItem += sizeof(STsdbFSetPartition); uItem += sizeof(typMax); @@ -229,7 +255,7 @@ int32_t tSerializeTsdbFSetPartList(void* buf, int32_t bufLen, STsdbFSetPartList* if (tEncodeI8(&encoder, reserved8) < 0) goto _err; if (tEncodeI16(&encoder, reserved16) < 0) goto _err; - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + int32_t typMax = TSDB_FSET_RANGE_TYP_MAX; if (tEncodeI32(&encoder, typMax) < 0) goto _err; for (int32_t i = 0; i < typMax; i++) { @@ -311,58 +337,34 @@ _err: return -1; } -int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList* pList, TFileSetRangeArray** ppRanges) { - TFileSetRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TFileSetRangeArray)); - if (pDiff == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; +// fs state +static STsdbFSetPartList* tsdbSnapGetFSetPartList(STFileSystem* fs) { + STsdbFSetPartList* pList = tsdbFSetPartListCreate(); + if (pList == NULL) { + return NULL; } - TARRAY2_INIT(pDiff); - STsdbFSetPartition* part; - TARRAY2_FOREACH(pList, part) { - STFileSetRange* r = taosMemoryCalloc(1, sizeof(STFileSetRange)); - if (r == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + int32_t code = 0; + taosThreadMutexLock(&fs->tsdb->mutex); + STFileSet* fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + STsdbFSetPartition* pItem = NULL; + if (tsdbTFileSetToFSetPartition(fset, &pItem) < 0) { + code = -1; + break; } - int64_t maxVerValid = -1; - int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; - for (int32_t i = 0; i < typMax; i++) { - SVerRangeList* iList = &part->verRanges[i]; - SVersionRange vr = {0}; - TARRAY2_FOREACH(iList, vr) { - if (vr.maxVer < vr.minVer) { - continue; - } - maxVerValid = TMAX(maxVerValid, vr.maxVer); - } - } - r->fid = part->fid; - r->sver = maxVerValid + 1; - r->ever = VERSION_MAX; - tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); - int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbFileSetRangeCmprFn); + ASSERT(pItem != NULL); + code = TARRAY2_SORT_INSERT(pList, pItem, tsdbFSetPartCmprFn); ASSERT(code == 0); } - ppRanges[0] = pDiff; + taosThreadMutexUnlock(&fs->tsdb->mutex); - tsdbInfo("pDiff size:%d", TARRAY2_SIZE(pDiff)); - return 0; - -_err: - if (pDiff) { - tsdbTFileSetRangeArrayDestroy(&pDiff); + if (code) { + TARRAY2_DESTROY(pList, tsdbFSetPartitionClear); + taosMemoryFree(pList); + pList = NULL; } - return -1; -} - -void tsdbFSetPartListDestroy(STsdbFSetPartList** ppList) { - if (ppList == NULL || ppList[0] == NULL) return; - - TARRAY2_DESTROY(ppList[0], tsdbFSetPartitionClear); - taosMemoryFree(ppList[0]); - ppList[0] = NULL; + return pList; } ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { @@ -378,6 +380,7 @@ ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { return TSDB_FS_STATE_NORMAL; } +// description int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { int code = -1; int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); @@ -387,7 +390,7 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { // get part list for (int32_t j = 0; j < tsdbMaxCnt; ++j) { STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, j); - pLists[j] = tsdbGetSnapPartList(pTsdb->pFS); + pLists[j] = tsdbSnapGetFSetPartList(pTsdb->pFS); if (pLists[j] == NULL) goto _out; } From f68804322c2e6ea54d7843102169e5d30153e0c0 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 4 Dec 2023 16:56:20 +0800 Subject: [PATCH 21/65] refact: improve code of tsdbSnapGetDetails as tsdbSnapPrepDescription --- source/dnode/vnode/src/inc/tsdb.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 133 ++++++++++++++------- source/dnode/vnode/src/vnd/vnodeSync.c | 2 +- 3 files changed, 89 insertions(+), 48 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index a5203353db..dc3aa418b4 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -1041,7 +1041,7 @@ typedef enum { // utils ETsdbFsState tsdbSnapGetFsState(SVnode *pVnode); -int32_t tsdbSnapGetDetails(SVnode *pVnode, SSnapshot *pSnap); +int32_t tsdbSnapPrepDescription(SVnode *pVnode, SSnapshot *pSnap); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 8afbe187a4..573ba48774 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -381,69 +381,110 @@ ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { } // description -int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { - int code = -1; - int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); - int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; - STsdbFSetPartList* pLists[TSDB_RETENTION_MAX] = {0}; +typedef struct STsdbPartitionInfo { + int32_t vgId; + int32_t tsdbMaxCnt; + int32_t subTyps[TSDB_RETENTION_MAX]; + STsdbFSetPartList* pLists[TSDB_RETENTION_MAX]; +} STsdbPartitionInfo; - // get part list - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { +static int32_t tsdbPartitionInfoInit(SVnode* pVnode, STsdbPartitionInfo* pInfo) { + int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; + pInfo->vgId = TD_VID(pVnode); + pInfo->tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); + + ASSERT(sizeof(pInfo->subTyps) == sizeof(subTyps)); + memcpy(pInfo->subTyps, (char*)subTyps, sizeof(subTyps)); + + // fset partition list + memset(pInfo->pLists, 0, sizeof(pInfo->pLists[0]) * TSDB_RETENTION_MAX); + for (int32_t j = 0; j < pInfo->tsdbMaxCnt; ++j) { STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, j); - pLists[j] = tsdbSnapGetFSetPartList(pTsdb->pFS); - if (pLists[j] == NULL) goto _out; + pInfo->pLists[j] = tsdbSnapGetFSetPartList(pTsdb->pFS); + if (pInfo->pLists[j] == NULL) return -1; } + return 0; +} - // estimate bufLen and prepare - int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - bufLen += sizeof(SSyncTLV); // subTyps[j] - bufLen += tTsdbFSetPartListDataLenCalc(pLists[j]); +static void tsdbPartitionInfoClear(STsdbPartitionInfo* pInfo) { + for (int32_t j = 0; j < pInfo->tsdbMaxCnt; ++j) { + if (pInfo->pLists[j] == NULL) continue; + tsdbFSetPartListDestroy(&pInfo->pLists[j]); } +} - tsdbInfo("vgId:%d, allocate %d bytes for data of snapshot info.", TD_VID(pVnode), bufLen); +static int32_t tsdbPartitionInfoEstSize(STsdbPartitionInfo* pInfo) { + int32_t dataLen = 0; + for (int32_t j = 0; j < pInfo->tsdbMaxCnt; ++j) { + dataLen += sizeof(SSyncTLV); // subTyps[j] + dataLen += tTsdbFSetPartListDataLenCalc(pInfo->pLists[j]); + } + return dataLen; +} - void* data = taosMemoryRealloc(pSnap->data, bufLen); +static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* buf, int32_t bufLen, int32_t* offset) { + int32_t tlen = 0; + for (int32_t j = 0; j < pInfo->tsdbMaxCnt; ++j) { + SSyncTLV* pSubHead = (void*)((char*)buf + offset[0]); + int32_t valOffset = offset[0] + sizeof(*pSubHead); + ASSERT(pSubHead->val == (char*)buf + valOffset); + if ((tlen = tSerializeTsdbFSetPartList(pSubHead->val, bufLen - valOffset, pInfo->pLists[j])) < 0) { + tsdbError("vgId:%d, failed to serialize fset partition list of tsdb %d since %s", pInfo->vgId, j, terrstr()); + return -1; + } + pSubHead->typ = pInfo->subTyps[j]; + pSubHead->len = tlen; + offset[0] += sizeof(*pSubHead) + tlen; + } + return 0; +} + +int32_t syncSnapInfoDataRealloc(SSnapshot* pSnap, int32_t size) { + void* data = taosMemoryRealloc(pSnap->data, size); if (data == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + pSnap->data = data; + return 0; +} + +int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { + ASSERT(pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY); + STsdbPartitionInfo partitionInfo = {0}; + int code = -1; + STsdbPartitionInfo* pInfo = &partitionInfo; + + if (tsdbPartitionInfoInit(pVnode, pInfo) != 0) { + goto _out; + } + + // info data realloc + int32_t bufLen = sizeof(SSyncTLV); + bufLen += tsdbPartitionInfoEstSize(pInfo); + if (syncSnapInfoDataRealloc(pSnap, bufLen) != 0) { tsdbError("vgId:%d, failed to realloc memory for data of snapshot info. bytes:%d", TD_VID(pVnode), bufLen); goto _out; } - pSnap->data = data; - // header - SSyncTLV* head = data; - head->len = 0; - head->typ = pSnap->type; - int32_t offset = sizeof(SSyncTLV); - int32_t tlen = 0; + // serialization + SSyncTLV* pHead = pSnap->data; + pHead->typ = pSnap->type; - // fill snapshot info - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - // subHead - SSyncTLV* subHead = (void*)((char*)data + offset); - subHead->typ = subTyps[j]; - ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); - - if ((tlen = tSerializeTsdbFSetPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { - tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); - goto _out; - } - subHead->len = tlen; - offset += sizeof(SSyncTLV) + tlen; + int32_t offset = 0; + if (tsdbPartitionInfoSerialize(pInfo, pHead->val, bufLen - sizeof(*pHead), &offset) != 0) { + tsdbError("vgId:%d, failed to serialize tsdb partition info since %s", TD_VID(pVnode), terrstr()); + goto _out; } - // total length of subfields - head->len = offset - sizeof(SSyncTLV); - ASSERT(offset <= bufLen); + // set header of info data + ASSERT(sizeof(*pHead) + offset <= bufLen); + pHead->len = offset; + + tsdbInfo("vgId:%d, tsdb snap info prepared. type:%s, val length:%d", TD_VID(pVnode), TMSG_INFO(pHead->typ), + pHead->len); code = 0; - _out: - for (int32_t j = 0; j < tsdbMaxCnt; ++j) { - if (pLists[j] == NULL) continue; - tsdbFSetPartListDestroy(&pLists[j]); - } - + tsdbPartitionInfoClear(pInfo); return code; } - diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 817d5124a2..5871a60c9e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -804,7 +804,7 @@ int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { } if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - code = tsdbSnapGetDetails(pVnode, pSnap); + code = tsdbSnapPrepDescription(pVnode, pSnap); } return code; } From 1a9b08fa0874c39ddadb4285e1f54b32a53c2400 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 4 Dec 2023 17:05:25 +0800 Subject: [PATCH 22/65] refact: relocate func syncSnapInfoDataRealloc --- include/libs/sync/sync.h | 3 +++ source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 10 ---------- source/libs/sync/src/syncUtil.c | 10 ++++++++++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index ece1e40585..a428a9ae6a 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -289,6 +289,9 @@ const char* syncStr(ESyncState state); int32_t syncNodeGetConfig(int64_t rid, SSyncCfg *cfg); +// util +int32_t syncSnapInfoDataRealloc(SSnapshot* pSnap, int32_t size); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 573ba48774..1662567247 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -439,16 +439,6 @@ static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* bu return 0; } -int32_t syncSnapInfoDataRealloc(SSnapshot* pSnap, int32_t size) { - void* data = taosMemoryRealloc(pSnap->data, size); - if (data == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - pSnap->data = data; - return 0; -} - int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { ASSERT(pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY); STsdbPartitionInfo partitionInfo = {0}; diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 06847c081c..2ce56af946 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -487,3 +487,13 @@ void syncLogSendRequestVoteReply(SSyncNode* pSyncNode, const SyncRequestVoteRepl sNInfo(pSyncNode, "send sync-request-vote-reply to dnode:%d {term:%" PRId64 ", grant:%d}, %s", DID(&pMsg->destId), pMsg->term, pMsg->voteGranted, s); } + +int32_t syncSnapInfoDataRealloc(SSnapshot* pSnap, int32_t size) { + void* data = taosMemoryRealloc(pSnap->data, size); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + pSnap->data = data; + return 0; +} From de9c9d4f205f8f30e29c1d47b4be5f6aa4474772 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 4 Dec 2023 21:05:18 +0800 Subject: [PATCH 23/65] refact: improve code of tsdbPartitionInfoSerialize --- source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 36 +++++++++++++--------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 1662567247..65ee1a7db3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -16,6 +16,8 @@ #include "tsdb.h" #include "tsdbFS2.h" +#define TSDB_SNAP_MSG_VER 1 + // fset partition static int32_t tsdbFSetPartCmprFn(STsdbFSetPartition* x, STsdbFSetPartition* y) { if (x->fid < y->fid) return -1; @@ -241,7 +243,7 @@ int32_t tSerializeTsdbFSetPartList(void* buf, int32_t bufLen, STsdbFSetPartList* int16_t reserved16 = 0; int64_t reserved64 = 0; - int8_t msgVer = 1; + int8_t msgVer = TSDB_SNAP_MSG_VER; int32_t len = TARRAY2_SIZE(pList); if (tStartEncode(&encoder) < 0) goto _err; @@ -296,6 +298,7 @@ int32_t tDeserializeTsdbFSetPartList(void* buf, int32_t bufLen, STsdbFSetPartLis int32_t len = 0; if (tStartDecode(&decoder) < 0) goto _err; if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; + if (msgVer != TSDB_SNAP_MSG_VER) goto _err; if (tDecodeI32(&decoder, &len) < 0) goto _err; for (int32_t u = 0; u < len; u++) { @@ -422,11 +425,12 @@ static int32_t tsdbPartitionInfoEstSize(STsdbPartitionInfo* pInfo) { return dataLen; } -static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* buf, int32_t bufLen, int32_t* offset) { +static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* buf, int32_t bufLen) { int32_t tlen = 0; + int32_t offset = 0; for (int32_t j = 0; j < pInfo->tsdbMaxCnt; ++j) { - SSyncTLV* pSubHead = (void*)((char*)buf + offset[0]); - int32_t valOffset = offset[0] + sizeof(*pSubHead); + SSyncTLV* pSubHead = (void*)((char*)buf + offset); + int32_t valOffset = offset + sizeof(*pSubHead); ASSERT(pSubHead->val == (char*)buf + valOffset); if ((tlen = tSerializeTsdbFSetPartList(pSubHead->val, bufLen - valOffset, pInfo->pLists[j])) < 0) { tsdbError("vgId:%d, failed to serialize fset partition list of tsdb %d since %s", pInfo->vgId, j, terrstr()); @@ -434,9 +438,9 @@ static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* bu } pSubHead->typ = pInfo->subTyps[j]; pSubHead->len = tlen; - offset[0] += sizeof(*pSubHead) + tlen; + offset += sizeof(*pSubHead) + tlen; } - return 0; + return offset; } int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { @@ -450,26 +454,30 @@ int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { } // info data realloc - int32_t bufLen = sizeof(SSyncTLV); + const int32_t headLen = sizeof(SSyncTLV); + int32_t bufLen = headLen; bufLen += tsdbPartitionInfoEstSize(pInfo); if (syncSnapInfoDataRealloc(pSnap, bufLen) != 0) { - tsdbError("vgId:%d, failed to realloc memory for data of snapshot info. bytes:%d", TD_VID(pVnode), bufLen); + tsdbError("vgId:%d, failed to realloc memory for data of snap info. bytes:%d", TD_VID(pVnode), bufLen); goto _out; } // serialization - SSyncTLV* pHead = pSnap->data; - pHead->typ = pSnap->type; + char* buf = (void*)pSnap->data; + int32_t offset = headLen; + int32_t tlen = 0; - int32_t offset = 0; - if (tsdbPartitionInfoSerialize(pInfo, pHead->val, bufLen - sizeof(*pHead), &offset) != 0) { + if ((tlen = tsdbPartitionInfoSerialize(pInfo, buf + offset, bufLen - offset)) < 0) { tsdbError("vgId:%d, failed to serialize tsdb partition info since %s", TD_VID(pVnode), terrstr()); goto _out; } + offset += tlen; + ASSERT(offset <= bufLen); // set header of info data - ASSERT(sizeof(*pHead) + offset <= bufLen); - pHead->len = offset; + SSyncTLV* pHead = pSnap->data; + pHead->typ = pSnap->type; + pHead->len = offset - headLen; tsdbInfo("vgId:%d, tsdb snap info prepared. type:%s, val length:%d", TD_VID(pVnode), TMSG_INFO(pHead->typ), pHead->len); From d4add073cc676d9ab52de5d24422d7e88a2f9704 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 3 Nov 2023 16:42:41 +0800 Subject: [PATCH 24/65] refact: improve code with syncSnapSendRsp --- source/libs/sync/inc/syncReplication.h | 4 ++ source/libs/sync/src/syncSnapshot.c | 70 ++++++++++---------------- 2 files changed, 31 insertions(+), 43 deletions(-) diff --git a/source/libs/sync/inc/syncReplication.h b/source/libs/sync/inc/syncReplication.h index 04456b2454..ecd2b5163e 100644 --- a/source/libs/sync/inc/syncReplication.h +++ b/source/libs/sync/inc/syncReplication.h @@ -56,6 +56,10 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode); int32_t syncNodeSendAppendEntries(SSyncNode* pNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg); +int32_t syncSnapSendMsg(SSyncSnapshotSender* pSender, int32_t seq, void* pBlock, int32_t len, int32_t typ); +int32_t syncSnapSendRsp(SSyncSnapshotReceiver* pReceiver, SyncSnapshotSend* pMsg, void* pBlock, int32_t len, + int32_t typ, int32_t code); + #ifdef __cplusplus } #endif diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 95952c960e..e1471ad3d9 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -23,8 +23,6 @@ #include "syncReplication.h" #include "syncUtil.h" -int32_t syncSnapSendMsg(SSyncSnapshotSender *pSender, int32_t seq, void *pBlock, int32_t len, int32_t typ); - static void syncSnapBufferReset(SSyncSnapBuffer *pBuf) { taosThreadMutexLock(&pBuf->mutex); for (int64_t i = pBuf->start; i < pBuf->end; ++i) { @@ -153,7 +151,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pSender->lastSendTime = taosGetTimestampMs(); pSender->finish = false; - // Get full snapshot info + // Get snapshot info SSyncNode *pSyncNode = pSender->pSyncNode; SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT}; if (pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo) != 0) { @@ -161,11 +159,10 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { goto _out; } - int dataLen = 0; void *pData = snapInfo.data; - int32_t type = 0; + int32_t type = (pData) ? snapInfo.type : 0; + int32_t dataLen = 0; if (pData) { - type = snapInfo.type; SSyncTLV *datHead = pData; if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT) { sSError(pSender, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); @@ -688,24 +685,23 @@ _START_RECEIVER: snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender -_SEND_REPLY: - // build msg - ; // make complier happy +_SEND_REPLY:; SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; int32_t dataLen = 0; - if (pMsg->dataLen > 0) { + if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT) { void *data = taosMemoryCalloc(1, pMsg->dataLen); if (data == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; code = terrno; goto _out; } - memcpy(data, pMsg->data, pMsg->dataLen); snapInfo.data = data; data = NULL; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo); + memcpy(snapInfo.data, pMsg->data, pMsg->dataLen); + // exchange snap info + pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo); SSyncTLV *datHead = snapInfo.data; if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { sRError(pReceiver, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); @@ -715,29 +711,16 @@ _SEND_REPLY: dataLen = sizeof(SSyncTLV) + datHead->len; } - SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, dataLen, pSyncNode->vgId) != 0) { - sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); + // send response + void *pData = snapInfo.data; + int32_t type = (pData) ? snapInfo.type : 0; + + if (syncSnapSendRsp(pReceiver, pMsg, pData, dataLen, type, code) != 0) { code = terrno; goto _out; } - SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; - pRspMsg->srcId = pSyncNode->myRaftId; - pRspMsg->destId = pMsg->srcId; - pRspMsg->term = raftStoreGetTerm(pSyncNode); - pRspMsg->lastIndex = pMsg->lastIndex; - pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pMsg->startTime; - pRspMsg->ack = pMsg->seq; // receiver maybe already closed - pRspMsg->code = code; - pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); - - if (snapInfo.data) { - pRspMsg->payloadType = snapInfo.type; - memcpy(pRspMsg->data, snapInfo.data, dataLen); - - // save snapshot info + if (pData) { SSnapshotParam *pParam = &pReceiver->snapshotParam; void *data = taosMemoryRealloc(pParam->data, dataLen); if (data == NULL) { @@ -748,15 +731,10 @@ _SEND_REPLY: goto _out; } pParam->data = data; + data = NULL; memcpy(pParam->data, snapInfo.data, dataLen); } - // send msg - if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { - sRError(pReceiver, "failed to send resp since %s", terrstr()); - code = terrno; - } - _out: if (snapInfo.data) { taosMemoryFree(snapInfo.data); @@ -820,11 +798,12 @@ _SEND_REPLY: return code; } -static int32_t syncSnapSendRsp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg, int32_t code) { +int32_t syncSnapSendRsp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg, void *pBlock, int32_t blockLen, + int32_t type, int32_t code) { SSyncNode *pSyncNode = pReceiver->pSyncNode; // build msg SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) { + if (syncBuildSnapshotSendRsp(&rpcMsg, blockLen, pSyncNode->vgId)) { sRError(pReceiver, "failed to build snapshot receiver resp since %s", terrstr()); return -1; } @@ -832,13 +811,18 @@ static int32_t syncSnapSendRsp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSen SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; pRspMsg->srcId = pSyncNode->myRaftId; pRspMsg->destId = pMsg->srcId; - pRspMsg->term = raftStoreGetTerm(pSyncNode); + pRspMsg->term = pMsg->term; pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pMsg->startTime; pRspMsg->ack = pMsg->seq; pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; + pRspMsg->payloadType = type; + + if (pBlock != NULL && blockLen > 0) { + memcpy(pRspMsg->data, pBlock, blockLen); + } // send msg if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { @@ -872,7 +856,7 @@ static int32_t syncSnapBufferRecv(SSyncSnapshotReceiver *pReceiver, SyncSnapshot ppMsg[0] = NULL; pRcvBuf->end = TMAX(pMsg->seq + 1, pRcvBuf->end); } else if (pMsg->seq < pRcvBuf->start) { - syncSnapSendRsp(pReceiver, pMsg, code); + syncSnapSendRsp(pReceiver, pMsg, NULL, 0, 0, code); goto _out; } @@ -892,7 +876,7 @@ static int32_t syncSnapBufferRecv(SSyncSnapshotReceiver *pReceiver, SyncSnapshot } } pRcvBuf->start = seq + 1; - syncSnapSendRsp(pReceiver, pRcvBuf->entries[seq % pRcvBuf->size], code); + syncSnapSendRsp(pReceiver, pRcvBuf->entries[seq % pRcvBuf->size], NULL, 0, 0, code); pRcvBuf->entryDeleteCb(pRcvBuf->entries[seq % pRcvBuf->size]); pRcvBuf->entries[seq % pRcvBuf->size] = NULL; if (code) goto _out; @@ -915,7 +899,7 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; sRError(pReceiver, "failed to receive snapshot data since %s.", terrstr()); - return syncSnapSendRsp(pReceiver, pMsg, terrno); + return syncSnapSendRsp(pReceiver, pMsg, NULL, 0, 0, terrno); } return syncSnapBufferRecv(pReceiver, ppMsg); From 41fe39de3edbbda0eb3c276e176dd447c817e579 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 3 Nov 2023 17:19:17 +0800 Subject: [PATCH 25/65] refact: improve code with syncNodeExchangeSnapInfo --- source/libs/sync/src/syncSnapshot.c | 83 +++++++++++++++++------------ 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index e1471ad3d9..a315b91791 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -641,6 +641,50 @@ SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths) { return snapStart; } +static int32_t syncNodeExchangeSnapInfo(SSyncNode *pSyncNode, SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg, + SSnapshot *pInfo) { + ASSERT(pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT); + int32_t code = 0; + + // copy snap info from leader + void *data = taosMemoryCalloc(1, pMsg->dataLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; + goto _out; + } + pInfo->data = data; + data = NULL; + memcpy(pInfo->data, pMsg->data, pMsg->dataLen); + + // exchange snap info + pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, pInfo); + SSyncTLV *datHead = pInfo->data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + sRError(pReceiver, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); + code = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + int32_t dataLen = sizeof(SSyncTLV) + datHead->len; + + // save exchanged snap info + SSnapshotParam *pParam = &pReceiver->snapshotParam; + data = taosMemoryRealloc(pParam->data, dataLen); + if (data == NULL) { + sError("vgId:%d, failed to realloc memory for snapshot prep due to %s. dataLen:%d", pSyncNode->vgId, + strerror(errno), dataLen); + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; + goto _out; + } + pParam->data = data; + data = NULL; + memcpy(pParam->data, pInfo->data, dataLen); + +_out: + return code; +} + static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; int64_t timeNow = taosGetTimestampMs(); @@ -683,58 +727,27 @@ _START_RECEIVER: snapshotReceiverStop(pReceiver); } - snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender + snapshotReceiverStart(pReceiver, pMsg); _SEND_REPLY:; SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; int32_t dataLen = 0; if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT) { - void *data = taosMemoryCalloc(1, pMsg->dataLen); - if (data == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = terrno; + if (syncNodeExchangeSnapInfo(pSyncNode, pReceiver, pMsg, &snapInfo) != 0) { goto _out; } - snapInfo.data = data; - data = NULL; - memcpy(snapInfo.data, pMsg->data, pMsg->dataLen); - - // exchange snap info - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo); SSyncTLV *datHead = snapInfo.data; - if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - sRError(pReceiver, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); - code = TSDB_CODE_INVALID_DATA_FMT; - goto _out; - } dataLen = sizeof(SSyncTLV) + datHead->len; } // send response - void *pData = snapInfo.data; - int32_t type = (pData) ? snapInfo.type : 0; - - if (syncSnapSendRsp(pReceiver, pMsg, pData, dataLen, type, code) != 0) { + int32_t type = (snapInfo.data) ? snapInfo.type : 0; + if (syncSnapSendRsp(pReceiver, pMsg, snapInfo.data, dataLen, type, code) != 0) { code = terrno; goto _out; } - if (pData) { - SSnapshotParam *pParam = &pReceiver->snapshotParam; - void *data = taosMemoryRealloc(pParam->data, dataLen); - if (data == NULL) { - sError("vgId:%d, failed to realloc memory for snapshot prep due to %s. dataLen:%d", pSyncNode->vgId, - strerror(errno), dataLen); - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = terrno; - goto _out; - } - pParam->data = data; - data = NULL; - memcpy(pParam->data, snapInfo.data, dataLen); - } - _out: if (snapInfo.data) { taosMemoryFree(snapInfo.data); From 030f3db4d604ae89a151ba5e270a4d38c806bd67 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 3 Nov 2023 17:38:33 +0800 Subject: [PATCH 26/65] refact: improve code of syncNodeOnSnapshotBegin with syncSnapSendRsp --- source/libs/sync/src/syncSnapshot.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index a315b91791..25b620854e 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -784,27 +784,8 @@ _SEND_REPLY: code = terrno; } - // build msg - SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { - sRError(pReceiver, "failed to build snapshot receiver resp since %s", terrstr()); - return -1; - } - - SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; - pRspMsg->srcId = pSyncNode->myRaftId; - pRspMsg->destId = pMsg->srcId; - pRspMsg->term = raftStoreGetTerm(pSyncNode); - pRspMsg->lastIndex = pMsg->lastIndex; - pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pMsg->startTime; - pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = code; - pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; - - // send msg - if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { - sRError(pReceiver, "failed to send snapshot receiver resp since %s", terrstr()); + // send response + if (syncSnapSendRsp(pReceiver, pMsg, NULL, 0, 0, code) != 0) { return -1; } From 081c83710ebde03067d5dd496b90a21fe299086d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 9 Nov 2023 18:21:15 +0800 Subject: [PATCH 27/65] enh: save a copy of snapshot info in syncNodeOnSnapshotPrepRsp --- source/libs/sync/src/syncSnapshot.c | 79 +++++++++++++---------------- 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 25b620854e..1e3614857e 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -121,6 +121,11 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { if (pSender->pSndBuf) { syncSnapBufferDestroy(&pSender->pSndBuf); } + + if (pSender->snapshotParam.data) { + taosMemoryFree(pSender->snapshotParam.data); + pSender->snapshotParam.data = NULL; + } // free sender taosMemoryFree(pSender); } @@ -344,9 +349,6 @@ _out:; return code; } -// return 0, start ok -// return 1, last snapshot finish ok -// return -1, error int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId); if (pSender == NULL) { @@ -377,6 +379,7 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { return 0; } +// receiver SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId) { bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) && (pSyncNode->pFsm->FpSnapshotDoWrite != NULL); @@ -506,8 +509,6 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p sRInfo(pReceiver, "snapshot receiver start, from dnode:%d.", DID(&pReceiver->fromId)); } -// just set start = false -// FpSnapshotStopWrite should not be called void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { sRDebug(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter); @@ -528,7 +529,6 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { syncSnapBufferReset(pReceiver->pRcvBuf); } -// when recv last snapshot block, apply data into snapshot static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { int32_t code = 0; if (pReceiver->pWriter != NULL) { @@ -587,8 +587,6 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap return 0; } -// apply data block -// update progress static int32_t snapshotReceiverGotData(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { if (pMsg->seq != pReceiver->ack + 1) { sRError(pReceiver, "snapshot receiver invalid seq, ack:%d seq:%d", pReceiver->ack, pMsg->seq); @@ -641,8 +639,8 @@ SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths) { return snapStart; } -static int32_t syncNodeExchangeSnapInfo(SSyncNode *pSyncNode, SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg, - SSnapshot *pInfo) { +static int32_t syncSnapReceiverExchgSnapInfo(SSyncNode *pSyncNode, SSyncSnapshotReceiver *pReceiver, + SyncSnapshotSend *pMsg, SSnapshot *pInfo) { ASSERT(pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT); int32_t code = 0; @@ -734,7 +732,7 @@ _SEND_REPLY:; SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; int32_t dataLen = 0; if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT) { - if (syncNodeExchangeSnapInfo(pSyncNode, pReceiver, pMsg, &snapInfo) != 0) { + if (syncSnapReceiverExchgSnapInfo(pSyncNode, pReceiver, pMsg, &snapInfo) != 0) { goto _out; } SSyncTLV *datHead = snapInfo.data; @@ -949,26 +947,6 @@ _SEND_REPLY:; return code; } -// receiver on message -// -// condition 1, recv SYNC_SNAPSHOT_SEQ_PREP -// if receiver already start -// if sender.start-time > receiver.start-time, restart receiver(reply snapshot start) -// if sender.start-time = receiver.start-time, maybe duplicate msg -// if sender.start-time < receiver.start-time, ignore -// else -// waiting for clock match -// start receiver(reply snapshot start) -// -// condition 2, recv SYNC_SNAPSHOT_SEQ_BEGIN -// a. create writer with -// -// condition 3, recv SYNC_SNAPSHOT_SEQ_END, finish receiver(apply snapshot data, update commit index, maybe reconfig) -// -// condition 4, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close -// -// condition 5, got data, update ack -// int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SRpcMsg *pRpcMsg) { SyncSnapshotSend **ppMsg = (SyncSnapshotSend **)&pRpcMsg->pCont; SyncSnapshotSend *pMsg = ppMsg[0]; @@ -1052,6 +1030,32 @@ _out:; return code; } +static int32_t syncSnapSenderExchgSnapInfo(SSyncNode *pSyncNode, SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { + ASSERT(pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT_REPLY); + + SSyncTLV *datHead = (void *)pMsg->data; + if (datHead->typ != pMsg->payloadType) { + sSError(pSender, "unexpected data type in data of SyncSnapshotRsp. typ: %d", datHead->typ); + terrno = TSDB_CODE_INVALID_DATA_FMT; + return -1; + } + int32_t dataLen = sizeof(SSyncTLV) + datHead->len; + + SSnapshotParam *pParam = &pSender->snapshotParam; + void *data = taosMemoryRealloc(pParam->data, dataLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + memcpy(data, pMsg->data, dataLen); + + pParam->data = data; + data = NULL; + sSInfo(pSender, "data of snapshot param. len: %d", datHead->len); + return 0; +} + +// sender static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { SSnapshot snapshot = {0}; pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); @@ -1068,14 +1072,9 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend // start reader if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - SSyncTLV *datHead = (void *)pMsg->data; - if (datHead->typ != pMsg->payloadType) { - sSError(pSender, "unexpected data type in data of SyncSnapshotRsp. typ: %d", datHead->typ); - terrno = TSDB_CODE_INVALID_DATA_FMT; + if (syncSnapSenderExchgSnapInfo(pSyncNode, pSender, pMsg) != 0) { return -1; } - pSender->snapshotParam.data = (void *)pMsg->data; - sSInfo(pSender, "data of snapshot param. len: %d", datHead->len); } int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); @@ -1160,12 +1159,6 @@ _out: return code; } -// sender on message -// -// condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender -// condition 2 sender receives ack, set seq = ack + 1, send msg from seq -// condition 3 sender receives error msg, just print error log -// int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, SRpcMsg *pRpcMsg) { SyncSnapshotRsp **ppMsg = (SyncSnapshotRsp **)&pRpcMsg->pCont; SyncSnapshotRsp *pMsg = ppMsg[0]; From fe258e226d67f6c65cda967ead94cd530fd3aadf Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 9 Nov 2023 18:27:07 +0800 Subject: [PATCH 28/65] feat: add data type of SNAP_DATA_RAW for snap replication --- source/dnode/vnode/src/inc/vnodeInt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7ed0b5103f..2c051ea642 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -531,6 +531,7 @@ enum { SNAP_DATA_STREAM_STATE = 11, SNAP_DATA_STREAM_STATE_BACKEND = 12, SNAP_DATA_TQ_CHECKINFO = 13, + SNAP_DATA_RAW = 14, }; struct SSnapDataHdr { From e05914119ed8515df37af53eca1d6a2a1747310f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 10 Nov 2023 15:27:06 +0800 Subject: [PATCH 29/65] feat: add file tsdbSnapshotRAW.c --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c new file mode 100644 index 0000000000..e69de29bb2 From 61fbf089cc6f8a2929ed31c1decddb172e740679 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 10 Nov 2023 15:28:30 +0800 Subject: [PATCH 30/65] enh: add cscope.files into .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 704b2e7415..08e3d57717 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ CMakeSettings.json cmake-build-debug/ cmake-build-release/ cscope.out +cscope.files .DS_Store debug/ release/ From da646b5a5b158839d9f445eb6cc12d457d8c252c Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 13 Nov 2023 16:00:37 +0800 Subject: [PATCH 31/65] feat: add file tsdbDataFileRAW.c --- source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 0 source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c create mode 100644 source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c new file mode 100644 index 0000000000..e69de29bb2 diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h new file mode 100644 index 0000000000..e69de29bb2 From a4c504169d1a64589ae6b29a8df79592911038d4 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 15 Nov 2023 15:49:11 +0800 Subject: [PATCH 32/65] feat: add file tsdbFSetRAW.c --- source/dnode/vnode/src/tsdb/tsdbFSetRAW.c | 0 source/dnode/vnode/src/tsdb/tsdbFSetRAW.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 source/dnode/vnode/src/tsdb/tsdbFSetRAW.c create mode 100644 source/dnode/vnode/src/tsdb/tsdbFSetRAW.h diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c new file mode 100644 index 0000000000..e69de29bb2 diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h new file mode 100644 index 0000000000..e69de29bb2 From 6c419423de9c2b7f3319da17e8d70994e851fbd2 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 20 Nov 2023 20:19:46 +0800 Subject: [PATCH 33/65] feat: impl tsdb snapshot reader and writer for raw files --- source/dnode/vnode/src/inc/vnodeInt.h | 11 + source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 212 +++++++ source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h | 127 ++++ source/dnode/vnode/src/tsdb/tsdbFSetRAW.c | 173 ++++++ source/dnode/vnode/src/tsdb/tsdbFSetRAW.h | 45 ++ source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 586 ++++++++++++++++++ source/dnode/vnode/src/vnd/vnodeSnapshot.c | 17 + 7 files changed, 1171 insertions(+) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 2c051ea642..50a28357e5 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -65,6 +65,8 @@ typedef struct SMetaSnapReader SMetaSnapReader; typedef struct SMetaSnapWriter SMetaSnapWriter; typedef struct STsdbSnapReader STsdbSnapReader; typedef struct STsdbSnapWriter STsdbSnapWriter; +typedef struct STsdbSnapRAWReader STsdbSnapRAWReader; +typedef struct STsdbSnapRAWWriter STsdbSnapRAWWriter; typedef struct STqSnapReader STqSnapReader; typedef struct STqSnapWriter STqSnapWriter; typedef struct STqOffsetReader STqOffsetReader; @@ -313,6 +315,15 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRang int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr); int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); +// STsdbSnapRAWReader ======================================== +int32_t tsdbSnapRAWReaderOpen(STsdb* pTsdb, int64_t ever, int8_t type, STsdbSnapRAWReader** ppReader); +int32_t tsdbSnapRAWReaderClose(STsdbSnapRAWReader** ppReader); +int32_t tsdbSnapRAWRead(STsdbSnapRAWReader* pReader, uint8_t** ppData); +// STsdbSnapRAWWriter ======================================== +int32_t tsdbSnapRAWWriterOpen(STsdb* pTsdb, int64_t ever, STsdbSnapRAWWriter** ppWriter); +int32_t tsdbSnapRAWWrite(STsdbSnapRAWWriter* pWriter, SSnapDataHdr* pHdr); +int32_t tsdbSnapRAWWriterPrepareClose(STsdbSnapRAWWriter* pWriter); +int32_t tsdbSnapRAWWriterClose(STsdbSnapRAWWriter** ppWriter, int8_t rollback); // STqSnapshotReader == int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader); int32_t tqSnapReaderClose(STqSnapReader** ppReader); diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c index e69de29bb2..0c3d890966 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRAW.h" + +// SDataFileRAWReader ============================================= +int32_t tsdbDataFileRAWReaderOpen(const char *fname, const SDataFileRAWReaderConfig *config, + SDataFileRAWReader **reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(SDataFileRAWReader)); + if (reader[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + reader[0]->config[0] = config[0]; + + if (fname) { + if (fname) { + code = tsdbOpenFile(fname, config->tsdb, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileName(config->tsdb, &config->file, fname1); + code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileRAWReaderClose(SDataFileRAWReader **reader) { + if (reader[0] == NULL) return 0; + + if (reader[0]->fd) { + tsdbCloseFile(&reader[0]->fd); + } + + taosMemoryFree(reader[0]); + reader[0] = NULL; + return 0; +} + +int32_t tsdbDataFileRAWReadBlockData(SDataFileRAWReader *reader, STsdbDataRAWBlockHeader *bHdr) { + int32_t code = 0; + int32_t lino = 0; + + bHdr->file.type = reader->config->file.type; + bHdr->file.fid = reader->config->file.fid; + bHdr->file.cid = reader->config->file.cid; + bHdr->file.size = reader->config->file.size; + bHdr->file.minVer = reader->config->file.minVer; + bHdr->file.maxVer = reader->config->file.maxVer; + bHdr->file.stt->level = reader->config->file.stt->level; + + int64_t size = TMIN(bHdr->dataLength, reader->config->file.size - reader->ctx->offset); + ASSERT(size > 0); + bHdr->dataLength = 0; + bHdr->offset = reader->ctx->offset; + + code = tsdbReadFile(reader->fd, bHdr->offset, bHdr->data, size); + TSDB_CHECK_CODE(code, lino, _exit); + + bHdr->dataLength = size; +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +// SDataFileRAWWriter ============================================= +int32_t tsdbDataFileRAWWriterOpen(const SDataFileRAWWriterConfig *config, SDataFileRAWWriter **writer) { + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (!writer[0]) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + return 0; +} + +static int32_t tsdbDataFileRAWWriterCloseAbort(SDataFileRAWWriter *writer) { + ASSERT(0); + return 0; +} + +static int32_t tsdbDataFileRAWWriterDoClose(SDataFileRAWWriter *writer) { return 0; } + +int32_t tsdbDataFileRAWWriterDoOpen(SDataFileRAWWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + writer->file = writer->config->file; + writer->ctx->offset = 0; + + writer->ctx->opened = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileRAWWriterCloseCommit(SDataFileRAWWriter *writer, TFileOpArray *opArr) { + int32_t code = 0; + int32_t lino = 0; + STFileOp op; + + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->file, + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + if (writer->fd) { + code = tsdbFsyncFile(writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + tsdbCloseFile(&writer->fd); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileRAWWriterOpenDataFD(SDataFileRAWWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + char fname[TSDB_FILENAME_LEN]; + int32_t flag = TD_FILE_READ | TD_FILE_WRITE; + + if (writer->file.size == 0) { + flag |= (TD_FILE_CREATE | TD_FILE_TRUNC); + } + + tsdbTFileName(writer->config->tsdb, &writer->file, fname); + code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileRAWWriterClose(SDataFileRAWWriter **writer, bool abort, TFileOpArray *opArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + if (writer[0]->ctx->opened) { + if (abort) { + code = tsdbDataFileRAWWriterCloseAbort(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileRAWWriterCloseCommit(writer[0], opArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbDataFileRAWWriterDoClose(writer[0]); + } + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileRAWWriteBlockData(SDataFileRAWWriter *writer, const STsdbDataRAWBlockHeader *pDataBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbWriteFile(writer->fd, writer->ctx->offset, (const uint8_t *)pDataBlock->data, pDataBlock->dataLength); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->file.size += pDataBlock->dataLength; + writer->ctx->offset += pDataBlock->dataLength; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h index e69de29bb2..49f80b0be5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tarray2.h" +#include "tsdbDef.h" +#include "tsdbFSet2.h" +#include "tsdbFile2.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_DATA_FILE_RAW_H +#define _TSDB_DATA_FILE_RAW_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define TSDB_SNAP_RAW_PAYLOAD_SIZE (4096 * 1024) +#if 0 +struct SDataRAWBlock { + int8_t *data; + int64_t size; +}; + +int32_t tsdbDataRAWBlockReset(SDataRAWBlock *pBlock); +int32_t tsdbDataRAWBlockAlloc(SDataRawBlock *pBlock); +void tsdbDataRAWBlockFree(SDataRAWBlock *pBlock); +#endif + +// STsdbDataRAWBlockHeader ======================================= +typedef struct STsdbDataRAWBlockHeader { + struct { + int32_t type; + int64_t fid; + int64_t cid; + int64_t size; + int64_t minVer; + int64_t maxVer; + union { + struct { + int32_t level; + } stt[1]; + }; + } file; + + int64_t offset; + int64_t dataLength; + uint8_t data[0]; +} STsdbDataRAWBlockHeader; + +// SDataFileRAWReader ============================================= +typedef struct SDataFileRAWReaderConfig { + STsdb *tsdb; + int32_t szPage; + + STFile file; +} SDataFileRAWReaderConfig; + +typedef struct SDataFileRAWReader { + SDataFileRAWReaderConfig config[1]; + + struct { + bool opened; + int64_t offset; + } ctx[1]; + + STsdbFD *fd; +} SDataFileRAWReader; + +typedef TARRAY2(SDataFileRAWReader *) SDataFileRAWReaderArray; + +int32_t tsdbDataFileRAWReaderOpen(const char *fname, const SDataFileRAWReaderConfig *config, + SDataFileRAWReader **reader); +int32_t tsdbDataFileRAWReaderClose(SDataFileRAWReader **reader); + +int32_t tsdbDataFileRAWReadBlockData(SDataFileRAWReader *reader, STsdbDataRAWBlockHeader *bHdr); + +// SDataFileRAWWriter ============================================= +typedef struct SDataFileRAWWriterConfig { + STsdb *tsdb; + int32_t szPage; + + SDiskID did; + int64_t fid; + int64_t cid; + int32_t level; + + STFile file; +} SDataFileRAWWriterConfig; + +typedef struct SDataFileRAWWriter { + SDataFileRAWWriterConfig config[1]; + + struct { + bool opened; + int64_t offset; + } ctx[1]; + + STFile file; + STsdbFD *fd; +} SDataFileRAWWriter; + +typedef struct SDataFileRAWWriter SDataFileRAWWriter; + +int32_t tsdbDataFileRAWWriterOpen(const SDataFileRAWWriterConfig *config, SDataFileRAWWriter **writer); +int32_t tsdbDataFileRAWWriterClose(SDataFileRAWWriter **writer, bool abort, TFileOpArray *opArr); + +int32_t tsdbDataFileRAWWriterDoOpen(SDataFileRAWWriter *writer); +int32_t tsdbDataFileRAWWriteBlockData(SDataFileRAWWriter *writer, const STsdbDataRAWBlockHeader *bHdr); +int32_t tsdbDataFileRAWFlush(SDataFileRAWWriter *writer); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_DATA_FILE_RAW_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c index e69de29bb2..d9cd419ef9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSetRAW.h" + +// SFSetRAWWriter ================================================== +typedef struct SFSetRAWWriter { + SFSetRAWWriterConfig config[1]; + + struct { + TFileOpArray fopArr[1]; + STFile file; + int64_t offset; + } ctx[1]; + + // writer + SDataFileRAWWriter *dataWriter; +} SFSetRAWWriter; + +int32_t tsdbFSetRAWWriterOpen(SFSetRAWWriterConfig *config, SFSetRAWWriter **writer) { + int32_t code = 0; + int32_t lino = 0; + + writer[0] = taosMemoryCalloc(1, sizeof(SFSetRAWWriter)); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + + TARRAY2_INIT(writer[0]->ctx->fopArr); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbFSetRAWWriterFinish(SFSetRAWWriter *writer, TFileOpArray *fopArr) { + int32_t code = 0; + int32_t lino = 0; + + STsdb *tsdb = writer->config->tsdb; + + STFileOp op; + TARRAY2_FOREACH(writer->ctx->fopArr, op) { + code = TARRAY2_APPEND(fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TARRAY2_CLEAR(writer->ctx->fopArr, NULL); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRAWBlockHeader *bHdr) { + int32_t code = 0; + int32_t lino = 0; + + SDataFileRAWWriterConfig config = { + .tsdb = writer->config->tsdb, + .szPage = writer->config->szPage, + .did = writer->config->did, + .cid = writer->config->cid, + .level = writer->config->level, + + .file = + { + .type = bHdr->file.type, + .did = writer->config->did, + .cid = writer->config->cid, + .size = bHdr->file.size, + .minVer = bHdr->file.minVer, + .maxVer = bHdr->file.maxVer, + .stt = {{ + .level = bHdr->file.stt->level, + }}, + }, + }; + + code = tsdbDataFileRAWWriterOpen(&config, &writer->dataWriter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbFSetRAWWriteFileDataEnd(SFSetRAWWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbDataFileRAWWriterClose(&writer->dataWriter, false, writer->ctx->fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetRAWWriterClose(SFSetRAWWriter **writer, bool abort, TFileOpArray *fopArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + STsdb *tsdb = writer[0]->config->tsdb; + + // end + code = tsdbFSetRAWWriteFileDataEnd(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileRAWWriterClose(&writer[0]->dataWriter, abort, writer[0]->ctx->fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSetRAWWriterFinish(writer[0], fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + // free + TARRAY2_DESTROY(writer[0]->ctx->fopArr, NULL); + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetRAWWriteBlockData(SFSetRAWWriter *writer, STsdbDataRAWBlockHeader *bHdr) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(writer->ctx->offset >= 0 && writer->ctx->offset <= writer->ctx->file.size); + + if (writer->ctx->offset == writer->ctx->file.size) { + code = tsdbFSetRAWWriteFileDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSetRAWWriteFileDataBegin(writer, bHdr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileRAWWriteBlockData(writer->dataWriter, bHdr); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->offset += bHdr->dataLength; + ASSERT(writer->ctx->offset == writer->dataWriter->ctx->offset); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h index e69de29bb2..205c785e99 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRAW.h" + +#ifndef _TSDB_FSET_RAW_H +#define _TSDB_FSET_RAW_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SFSetRAWWriterConfig { + STsdb *tsdb; + int32_t szPage; + + SDiskID did; + int64_t fid; + int64_t cid; + int32_t level; +} SFSetRAWWriterConfig; + +typedef struct SFSetRAWWriter SFSetRAWWriter; + +int32_t tsdbFSetRAWWriterOpen(SFSetRAWWriterConfig *config, SFSetRAWWriter **writer); +int32_t tsdbFSetRAWWriterClose(SFSetRAWWriter **writer, bool abort, TFileOpArray *fopArr); +int32_t tsdbFSetRAWWriteBlockData(SFSetRAWWriter *writer, STsdbDataRAWBlockHeader *bHdr); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FSET_RAW_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index e69de29bb2..2bcd00bfec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" +#include "tsdbDataFileRAW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRAW.h" + +// reader + +typedef struct SDataFileRAWReaderIter { + int32_t count; + int32_t idx; + int64_t offset; + int64_t size; +} SDataFileRAWReaderIter; + +typedef struct STsdbSnapRAWReader { + STsdb* tsdb; + int64_t ever; + int8_t type; + + TFileSetArray* fsetArr; + + // context + struct { + int32_t fsetArrIdx; + STFileSet* fset; + bool isDataDone; + } ctx[1]; + + // reader + SDataFileRAWReaderArray dataReaderArr[1]; + + // iter + SDataFileRAWReaderIter dataIter[1]; +} STsdbSnapRAWReader; + +int32_t tsdbSnapRAWReaderOpen(STsdb* tsdb, int64_t ever, int8_t type, STsdbSnapRAWReader** reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(STsdbSnapRAWReader)); + if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + reader[0]->tsdb = tsdb; + reader[0]->ever = ever; + reader[0]->type = type; + + code = tsdbFSCreateRefSnapshot(tsdb->pFS, &reader[0]->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), + __func__, lino, tstrerror(code), 0, ever, type); + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + taosMemoryFree(reader[0]); + reader[0] = NULL; + } else { + tsdbInfo("vgId:%d tsdb snapshot reader opened. sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), 0, + ever, type); + } + return code; +} + +int32_t tsdbSnapRAWReaderClose(STsdbSnapRAWReader** reader) { + if (reader[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + STsdb* tsdb = reader[0]->tsdb; + + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + taosMemoryFree(reader[0]); + reader[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapRAWReadFileSetOpenReader(STsdbSnapRAWReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + // data + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ftype++) { + if (reader->ctx->fset->farr[ftype] == NULL) { + continue; + } + STFileObj* fobj = reader->ctx->fset->farr[ftype]; + SDataFileRAWReader* dataReader; + SDataFileRAWReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .file = fobj->f[0], + }; + code = tsdbDataFileRAWReaderOpen(NULL, &config, &dataReader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->dataReaderArr, dataReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // stt + SSttLvl* lvl; + TARRAY2_FOREACH(reader->ctx->fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + SDataFileRAWReader* dataReader; + SDataFileRAWReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .file = fobj->f[0], + }; + code = tsdbDataFileRAWReaderOpen(NULL, &config, &dataReader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->dataReaderArr, dataReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapRAWReadFileSetCloseReader(STsdbSnapRAWReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(reader->dataReaderArr, tsdbDataFileRAWReaderClose); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapRAWReadFileSetOpenIter(STsdbSnapRAWReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + reader->dataIter->count = TARRAY2_SIZE(reader->dataReaderArr); + reader->dataIter->idx = -1; + reader->dataIter->offset = 0; + reader->dataIter->size = 0; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapRAWReadFileSetCloseIter(STsdbSnapRAWReader* reader) { + reader->dataIter->count = 0; + reader->dataIter->idx = 0; + reader->dataIter->offset = 0; + reader->dataIter->size = 0; + return 0; +} + +static int32_t tsdbSnapRAWReadNext(STsdbSnapRAWReader* reader, STsdbDataRAWBlockHeader* bHdr) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(reader->dataIter->offset <= reader->dataIter->size); + ASSERT(reader->dataIter->idx <= reader->dataIter->count); + + if (reader->dataIter->offset == reader->dataIter->size && reader->dataIter->idx < reader->dataIter->count) { + reader->dataIter->idx++; + } + if (reader->dataIter->idx == reader->dataIter->count) { + return 0; + } + + SDataFileRAWReader* dataReader = TARRAY2_GET(reader->dataReaderArr, reader->dataIter->idx); + code = tsdbDataFileRAWReadBlockData(dataReader, bHdr); + TSDB_CHECK_CODE(code, lino, _exit); + + reader->dataIter->offset += bHdr->dataLength; +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapRAWReadData(STsdbSnapRAWReader* reader, SSnapDataHdr** data) { + int32_t code = 0; + int32_t lino = 0; + + void* pBuf = taosMemoryCalloc(1, sizeof(SSnapDataHdr) + sizeof(STsdbDataRAWBlockHeader) + TSDB_SNAP_RAW_PAYLOAD_SIZE); + if (pBuf == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + SSnapDataHdr* pHdr = pBuf; + pHdr->type = reader->type; + STsdbDataRAWBlockHeader* pData = (void*)pHdr->data; + pData->dataLength = TSDB_SNAP_RAW_PAYLOAD_SIZE; + + code = tsdbSnapRAWReadNext(reader, pData); + TSDB_CHECK_CODE(code, lino, _exit); + + ASSERT(pData->dataLength > 0 && pData->dataLength <= TSDB_SNAP_RAW_PAYLOAD_SIZE); + pHdr->size = sizeof(STsdbDataRAWBlockHeader) + pData->dataLength; + +_exit: + if (code) { + taosMemoryFree(pBuf); + pBuf = NULL; + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + data[0] = pBuf; + return code; +} + +static int32_t tsdbSnapRAWReadBegin(STsdbSnapRAWReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(reader->ctx->fset == NULL); + + if (reader->ctx->fsetArrIdx < TARRAY2_SIZE(reader->fsetArr)) { + reader->ctx->fset = TARRAY2_GET(reader->fsetArr, reader->ctx->fsetArrIdx++); + reader->ctx->isDataDone = false; + + code = tsdbSnapRAWReadFileSetOpenReader(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapRAWReadFileSetOpenIter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapRAWReadEnd(STsdbSnapRAWReader* reader) { + tsdbSnapRAWReadFileSetCloseIter(reader); + tsdbSnapRAWReadFileSetCloseReader(reader); + reader->ctx->fset = NULL; + return 0; +} + +int32_t tsdbSnapRAWRead(STsdbSnapRAWReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; + + data[0] = NULL; + + for (;;) { + if (reader->ctx->fset == NULL) { + code = tsdbSnapRAWReadBegin(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->ctx->fset == NULL) { + break; + } + } + + if (!reader->ctx->isDataDone) { + code = tsdbSnapRAWReadData(reader, (SSnapDataHdr**)data); + TSDB_CHECK_CODE(code, lino, _exit); + if (data[0]) { + goto _exit; + } else { + reader->ctx->isDataDone = true; + } + } + + code = tsdbSnapRAWReadEnd(reader); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(reader->tsdb->pVnode), __func__); + } + return code; +} + +// writer +struct STsdbSnapRAWWriter { + STsdb* tsdb; + int64_t sver; + int64_t ever; + int32_t minutes; + int8_t precision; + int32_t minRow; + int32_t maxRow; + int8_t cmprAlg; + int64_t commitID; + int32_t szPage; + int64_t compactVersion; + int64_t now; + + TFileSetArray* fsetArr; + TFileOpArray fopArr[1]; + + struct { + bool fsetWriteBegin; + int32_t fid; + STFileSet* fset; + SDiskID did; + int64_t cid; + int64_t level; + + // writer + SFSetRAWWriter* fsetWriter; + } ctx[1]; +}; + +int32_t tsdbSnapRAWWriterOpen(STsdb* pTsdb, int64_t ever, STsdbSnapRAWWriter** writer) { + int32_t code = 0; + int32_t lino = 0; + + // disable background tasks + tsdbFSDisableBgTask(pTsdb->pFS); + + // start to write + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->tsdb = pTsdb; + writer[0]->ever = ever; + writer[0]->minutes = pTsdb->keepCfg.days; + writer[0]->precision = pTsdb->keepCfg.precision; + writer[0]->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; + writer[0]->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; + writer[0]->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; + writer[0]->commitID = tsdbFSAllocEid(pTsdb->pFS); + writer[0]->szPage = pTsdb->pVnode->config.tsdbPageSize; + writer[0]->compactVersion = INT64_MAX; + writer[0]->now = taosGetTimestampMs(); + + code = tsdbFSCreateCopySnapshot(pTsdb->pFS, &writer[0]->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, 0, ever); + } + return code; +} + +static int32_t tsdbSnapRAWWriteFileSetOpenIter(STsdbSnapRAWWriter* writer) { + int32_t code = 0; + int32_t lino = 0; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapRAWWriteFileSetCloseIter(STsdbSnapRAWWriter* writer) { return 0; } + +static int32_t tsdbSnapRAWWriteFileSetOpenWriter(STsdbSnapRAWWriter* writer) { + int32_t code = 0; + int32_t lino = 0; + + SFSetRAWWriterConfig config = { + .tsdb = writer->tsdb, + .szPage = writer->szPage, + .fid = writer->ctx->fid, + .cid = writer->commitID, + .did = writer->ctx->did, + .level = writer->ctx->level, + }; + + code = tsdbFSetRAWWriterOpen(&config, &writer->ctx->fsetWriter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapRAWWriteFileSetCloseWriter(STsdbSnapRAWWriter* writer) { + return tsdbFSetRAWWriterClose(&writer->ctx->fsetWriter, 0, writer->fopArr); +} + +static int32_t tsdbSnapRAWWriteFileSetBegin(STsdbSnapRAWWriter* writer, int32_t fid) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(writer->ctx->fsetWriteBegin == false); + + STFileSet* fset = &(STFileSet){.fid = fid}; + + writer->ctx->fid = fid; + STFileSet** fsetPtr = TARRAY2_SEARCH(writer->fsetArr, &fset, tsdbTFileSetCmprFn, TD_EQ); + writer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + + int32_t level = tsdbFidLevel(fid, &writer->tsdb->keepCfg, taosGetTimestampSec()); + if (tfsAllocDisk(writer->tsdb->pVnode->pTfs, level, &writer->ctx->did)) { + code = TSDB_CODE_NO_AVAIL_DISK; + TSDB_CHECK_CODE(code, lino, _exit); + } + tfsMkdirRecurAt(writer->tsdb->pVnode->pTfs, writer->tsdb->path, writer->ctx->did); + + code = tsdbSnapRAWWriteFileSetOpenWriter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->level = level; + writer->ctx->fsetWriteBegin = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapRAWWriteFileSetEnd(STsdbSnapRAWWriter* writer) { + if (!writer->ctx->fsetWriteBegin) return 0; + + int32_t code = 0; + int32_t lino = 0; + + // close write + code = tsdbSnapRAWWriteFileSetCloseWriter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->fsetWriteBegin = false; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSnapRAWWriterPrepareClose(STsdbSnapRAWWriter* writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbSnapRAWWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSEditBegin(writer->tsdb->pFS, writer->fopArr, TSDB_FEDIT_COMMIT); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(writer->tsdb->pVnode), __func__); + } + return code; +} + +int32_t tsdbSnapRAWWriterClose(STsdbSnapRAWWriter** writer, int8_t rollback) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + STsdb* tsdb = writer[0]->tsdb; + + if (rollback) { + code = tsdbFSEditAbort(writer[0]->tsdb->pFS); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + taosThreadMutexLock(&writer[0]->tsdb->mutex); + + code = tsdbFSEditCommit(writer[0]->tsdb->pFS); + if (code) { + taosThreadMutexUnlock(&writer[0]->tsdb->mutex); + TSDB_CHECK_CODE(code, lino, _exit); + } + + writer[0]->tsdb->pFS->fsstate = TSDB_FS_STATE_NORMAL; + + taosThreadMutexUnlock(&writer[0]->tsdb->mutex); + } + tsdbFSEnableBgTask(tsdb->pFS); + + TARRAY2_DESTROY(writer[0]->fopArr, NULL); + tsdbFSDestroyCopySnapshot(&writer[0]->fsetArr); + + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapRAWWriteTimeSeriesData(STsdbSnapRAWWriter* writer, STsdbDataRAWBlockHeader* bHdr) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFSetRAWWriteBlockData(writer->ctx->fsetWriter, bHdr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapRAWWriteData(STsdbSnapRAWWriter* writer, SSnapDataHdr* hdr) { + int32_t code = 0; + int32_t lino = 0; + + STsdbDataRAWBlockHeader* bHdr = (void*)hdr->data; + int32_t fid = bHdr->file.fid; + if (!writer->ctx->fsetWriteBegin || fid != writer->ctx->fid) { + code = tsdbSnapRAWWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapRAWWriteFileSetBegin(writer, fid); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapRAWWriteTimeSeriesData(writer, bHdr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSnapRAWWrite(STsdbSnapRAWWriter* writer, SSnapDataHdr* hdr) { + ASSERT(hdr->type == SNAP_DATA_RAW); + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbSnapRAWWriteData(writer, hdr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, + TD_VID(writer->tsdb->pVnode), __func__, lino, tstrerror(code), hdr->type, hdr->index, hdr->size); + } else { + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(writer->tsdb->pVnode), __func__, + hdr->type, hdr->index, hdr->size); + } + return code; +} diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 21c858709b..1a6b1a7a6c 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -51,6 +51,10 @@ struct SVSnapReader { int8_t tsdbDone; TFileSetRangeArray *pRanges; STsdbSnapReader *pTsdbReader; + // tsdb raw + int8_t tsdbRawDone; + STsdbSnapRAWReader *pTsdbRawReader; + // tq int8_t tqHandleDone; STqSnapReader *pTqSnapReader; @@ -467,6 +471,8 @@ struct SVSnapWriter { // tsdb TFileSetRangeArray *pRanges; STsdbSnapWriter *pTsdbSnapWriter; + // tsdb raw + STsdbSnapRAWWriter *pTsdbSnapRAWWriter; // tq STqSnapWriter *pTqSnapWriter; STqOffsetWriter *pTqOffsetWriter; @@ -772,6 +778,17 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr); if (code) goto _err; } break; + case SNAP_DATA_RAW: { + // tsdb + if (pWriter->pTsdbSnapRAWWriter == NULL) { + ASSERT(pWriter->sver == 0); + code = tsdbSnapRAWWriterOpen(pVnode->pTsdb, pWriter->ever, &pWriter->pTsdbSnapRAWWriter); + if (code) goto _err; + } + + code = tsdbSnapRAWWrite(pWriter->pTsdbSnapRAWWriter, pHdr); + if (code) goto _err; + } break; case SNAP_DATA_TQ_HANDLE: { // tq handle if (pWriter->pTqSnapWriter == NULL) { From a504007ade887f559c3ad2486d7a82de09651b5f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 24 Nov 2023 16:13:53 +0800 Subject: [PATCH 34/65] refact: alloc just enough memory for snap data in tsdbSnapRAWReadData --- source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 24 +++---- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 65 ++++++++++++------- 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c index 0c3d890966..aaaaa56b0e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c @@ -60,27 +60,21 @@ int32_t tsdbDataFileRAWReaderClose(SDataFileRAWReader **reader) { return 0; } -int32_t tsdbDataFileRAWReadBlockData(SDataFileRAWReader *reader, STsdbDataRAWBlockHeader *bHdr) { +int32_t tsdbDataFileRAWReadBlockData(SDataFileRAWReader *reader, STsdbDataRAWBlockHeader *pBlock) { int32_t code = 0; int32_t lino = 0; - bHdr->file.type = reader->config->file.type; - bHdr->file.fid = reader->config->file.fid; - bHdr->file.cid = reader->config->file.cid; - bHdr->file.size = reader->config->file.size; - bHdr->file.minVer = reader->config->file.minVer; - bHdr->file.maxVer = reader->config->file.maxVer; - bHdr->file.stt->level = reader->config->file.stt->level; + pBlock->file.type = reader->config->file.type; + pBlock->file.fid = reader->config->file.fid; + pBlock->file.cid = reader->config->file.cid; + pBlock->file.size = reader->config->file.size; + pBlock->file.minVer = reader->config->file.minVer; + pBlock->file.maxVer = reader->config->file.maxVer; + pBlock->file.stt->level = reader->config->file.stt->level; - int64_t size = TMIN(bHdr->dataLength, reader->config->file.size - reader->ctx->offset); - ASSERT(size > 0); - bHdr->dataLength = 0; - bHdr->offset = reader->ctx->offset; - - code = tsdbReadFile(reader->fd, bHdr->offset, bHdr->data, size); + code = tsdbReadFile(reader->fd, pBlock->offset, pBlock->data, pBlock->dataLength); TSDB_CHECK_CODE(code, lino, _exit); - bHdr->dataLength = size; _exit: if (code) { TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index 2bcd00bfec..14c52c0bfb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -183,52 +183,54 @@ static int32_t tsdbSnapRAWReadFileSetCloseIter(STsdbSnapRAWReader* reader) { return 0; } -static int32_t tsdbSnapRAWReadNext(STsdbSnapRAWReader* reader, STsdbDataRAWBlockHeader* bHdr) { +static int64_t tsdbSnapRAWReadPeek(SDataFileRAWReader* reader) { + int64_t size = TMIN(reader->config->file.size - reader->ctx->offset, TSDB_SNAP_RAW_PAYLOAD_SIZE); + return size; +} + +static int32_t tsdbSnapRAWReadNext(STsdbSnapRAWReader* reader, SSnapDataHdr** ppData) { int32_t code = 0; int32_t lino = 0; + ppData[0] = NULL; ASSERT(reader->dataIter->offset <= reader->dataIter->size); ASSERT(reader->dataIter->idx <= reader->dataIter->count); + // dataReader if (reader->dataIter->offset == reader->dataIter->size && reader->dataIter->idx < reader->dataIter->count) { reader->dataIter->idx++; } if (reader->dataIter->idx == reader->dataIter->count) { return 0; } - + int8_t type = reader->type; SDataFileRAWReader* dataReader = TARRAY2_GET(reader->dataReaderArr, reader->dataIter->idx); - code = tsdbDataFileRAWReadBlockData(dataReader, bHdr); - TSDB_CHECK_CODE(code, lino, _exit); - reader->dataIter->offset += bHdr->dataLength; -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); - } - return code; -} + // prepare + int64_t dataLength = tsdbSnapRAWReadPeek(dataReader); + ASSERT(dataLength > 0); -static int32_t tsdbSnapRAWReadData(STsdbSnapRAWReader* reader, SSnapDataHdr** data) { - int32_t code = 0; - int32_t lino = 0; - - void* pBuf = taosMemoryCalloc(1, sizeof(SSnapDataHdr) + sizeof(STsdbDataRAWBlockHeader) + TSDB_SNAP_RAW_PAYLOAD_SIZE); + void* pBuf = taosMemoryCalloc(1, sizeof(SSnapDataHdr) + sizeof(STsdbDataRAWBlockHeader) + dataLength); if (pBuf == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = pBuf; - pHdr->type = reader->type; - STsdbDataRAWBlockHeader* pData = (void*)pHdr->data; - pData->dataLength = TSDB_SNAP_RAW_PAYLOAD_SIZE; + pHdr->type = type; + pHdr->size = sizeof(STsdbDataRAWBlockHeader) + dataLength; - code = tsdbSnapRAWReadNext(reader, pData); + STsdbDataRAWBlockHeader* pBlock = (void*)pHdr->data; + pBlock->offset = dataReader->ctx->offset; + pBlock->dataLength = dataLength; + + // read + code = tsdbDataFileRAWReadBlockData(dataReader, pBlock); TSDB_CHECK_CODE(code, lino, _exit); - ASSERT(pData->dataLength > 0 && pData->dataLength <= TSDB_SNAP_RAW_PAYLOAD_SIZE); - pHdr->size = sizeof(STsdbDataRAWBlockHeader) + pData->dataLength; + // finish + reader->dataIter->offset += pBlock->dataLength; + ppData[0] = pBuf; + ASSERT(reader->dataIter->offset <= reader->dataIter->size); _exit: if (code) { @@ -236,7 +238,20 @@ _exit: pBuf = NULL; TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } - data[0] = pBuf; + return code; +} + +static int32_t tsdbSnapRAWReadData(STsdbSnapRAWReader* reader, uint8_t** ppData) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbSnapRAWReadNext(reader, (SSnapDataHdr**)ppData); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } return code; } @@ -288,7 +303,7 @@ int32_t tsdbSnapRAWRead(STsdbSnapRAWReader* reader, uint8_t** data) { } if (!reader->ctx->isDataDone) { - code = tsdbSnapRAWReadData(reader, (SSnapDataHdr**)data); + code = tsdbSnapRAWReadData(reader, data); TSDB_CHECK_CODE(code, lino, _exit); if (data[0]) { goto _exit; From ae60e1f810e411273b59a0c8b0fad324cc37404a Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 24 Nov 2023 16:15:33 +0800 Subject: [PATCH 35/65] refact: add tsdbSnapRAWReaderIterNext --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index 14c52c0bfb..7010e6ad27 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -23,8 +23,6 @@ typedef struct SDataFileRAWReaderIter { int32_t count; int32_t idx; - int64_t offset; - int64_t size; } SDataFileRAWReaderIter; typedef struct STsdbSnapRAWReader { @@ -164,9 +162,7 @@ static int32_t tsdbSnapRAWReadFileSetOpenIter(STsdbSnapRAWReader* reader) { int32_t lino = 0; reader->dataIter->count = TARRAY2_SIZE(reader->dataReaderArr); - reader->dataIter->idx = -1; - reader->dataIter->offset = 0; - reader->dataIter->size = 0; + reader->dataIter->idx = 0; _exit: if (code) { @@ -178,8 +174,6 @@ _exit: static int32_t tsdbSnapRAWReadFileSetCloseIter(STsdbSnapRAWReader* reader) { reader->dataIter->count = 0; reader->dataIter->idx = 0; - reader->dataIter->offset = 0; - reader->dataIter->size = 0; return 0; } @@ -188,23 +182,30 @@ static int64_t tsdbSnapRAWReadPeek(SDataFileRAWReader* reader) { return size; } +static SDataFileRAWReader* tsdbSnapRAWReaderIterNext(STsdbSnapRAWReader* reader) { + ASSERT(reader->dataIter->idx <= reader->dataIter->count); + + while (reader->dataIter->idx < reader->dataIter->count) { + SDataFileRAWReader* dataReader = TARRAY2_GET(reader->dataReaderArr, reader->dataIter->idx); + ASSERT(dataReader); + if (dataReader->ctx->offset < dataReader->config->file.size) { + return dataReader; + } + reader->dataIter->idx++; + } + return NULL; +} + static int32_t tsdbSnapRAWReadNext(STsdbSnapRAWReader* reader, SSnapDataHdr** ppData) { int32_t code = 0; int32_t lino = 0; + int8_t type = reader->type; ppData[0] = NULL; - ASSERT(reader->dataIter->offset <= reader->dataIter->size); - ASSERT(reader->dataIter->idx <= reader->dataIter->count); - - // dataReader - if (reader->dataIter->offset == reader->dataIter->size && reader->dataIter->idx < reader->dataIter->count) { - reader->dataIter->idx++; - } - if (reader->dataIter->idx == reader->dataIter->count) { + SDataFileRAWReader* dataReader = tsdbSnapRAWReaderIterNext(reader); + if (dataReader == NULL) { return 0; } - int8_t type = reader->type; - SDataFileRAWReader* dataReader = TARRAY2_GET(reader->dataReaderArr, reader->dataIter->idx); // prepare int64_t dataLength = tsdbSnapRAWReadPeek(dataReader); @@ -219,18 +220,18 @@ static int32_t tsdbSnapRAWReadNext(STsdbSnapRAWReader* reader, SSnapDataHdr** pp pHdr->type = type; pHdr->size = sizeof(STsdbDataRAWBlockHeader) + dataLength; + // read STsdbDataRAWBlockHeader* pBlock = (void*)pHdr->data; pBlock->offset = dataReader->ctx->offset; pBlock->dataLength = dataLength; - // read code = tsdbDataFileRAWReadBlockData(dataReader, pBlock); TSDB_CHECK_CODE(code, lino, _exit); // finish - reader->dataIter->offset += pBlock->dataLength; + dataReader->ctx->offset += pBlock->dataLength; + ASSERT(dataReader->ctx->offset <= dataReader->config->file.size); ppData[0] = pBuf; - ASSERT(reader->dataIter->offset <= reader->dataIter->size); _exit: if (code) { From 1dc9019baa66e30d88497ab993c7948a0b1e70b0 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 24 Nov 2023 16:28:39 +0800 Subject: [PATCH 36/65] feat: add tsdbSnapRAWRead into vnodeSnapRead --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 26 ++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 1a6b1a7a6c..cb3d346df5 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -52,8 +52,8 @@ struct SVSnapReader { TFileSetRangeArray *pRanges; STsdbSnapReader *pTsdbReader; // tsdb raw - int8_t tsdbRawDone; - STsdbSnapRAWReader *pTsdbRawReader; + int8_t tsdbRAWDone; + STsdbSnapRAWReader *pTsdbRAWReader; // tq int8_t tqHandleDone; @@ -299,6 +299,28 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } } + if (!pReader->tsdbRAWDone) { + // open if not + if (pReader->pTsdbRAWReader == NULL) { + ASSERT(pReader->sver == 0); + code = tsdbSnapRAWReaderOpen(pReader->pVnode->pTsdb, pReader->ever, SNAP_DATA_RAW, &pReader->pTsdbRAWReader); + if (code) goto _err; + } + + code = tsdbSnapRAWRead(pReader->pTsdbRAWReader, ppData); + if (code) { + goto _err; + } else { + if (*ppData) { + goto _exit; + } else { + pReader->tsdbRAWDone = 1; + code = tsdbSnapRAWReaderClose(&pReader->pTsdbRAWReader); + if (code) goto _err; + } + } + } + // TQ ================ vInfo("vgId:%d tq transform start", vgId); if (!pReader->tqHandleDone) { From f1362669722690e5a70eaf3672b44f06f896c85b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 24 Nov 2023 20:04:39 +0800 Subject: [PATCH 37/65] feat: support snap replication by file blocks --- source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 66 ++++++++++++------- source/dnode/vnode/src/tsdb/tsdbFSetRAW.c | 5 ++ source/dnode/vnode/src/vnd/vnodeSnapshot.c | 11 ++++ 3 files changed, 57 insertions(+), 25 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c index aaaaa56b0e..4a72fcfda8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c @@ -83,12 +83,26 @@ _exit: } // SDataFileRAWWriter ============================================= -int32_t tsdbDataFileRAWWriterOpen(const SDataFileRAWWriterConfig *config, SDataFileRAWWriter **writer) { - writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); - if (!writer[0]) return TSDB_CODE_OUT_OF_MEMORY; +int32_t tsdbDataFileRAWWriterOpen(const SDataFileRAWWriterConfig *config, SDataFileRAWWriter **ppWriter) { + int32_t code = 0; + int32_t lino = 0; - writer[0]->config[0] = config[0]; - return 0; + SDataFileRAWWriter *writer = taosMemoryCalloc(1, sizeof(SDataFileRAWWriter)); + if (!writer) return TSDB_CODE_OUT_OF_MEMORY; + + writer->config[0] = config[0]; + + code = tsdbDataFileRAWWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + taosMemoryFree(writer); + writer = NULL; + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + ppWriter[0] = writer; + return code; } static int32_t tsdbDataFileRAWWriterCloseAbort(SDataFileRAWWriter *writer) { @@ -98,28 +112,13 @@ static int32_t tsdbDataFileRAWWriterCloseAbort(SDataFileRAWWriter *writer) { static int32_t tsdbDataFileRAWWriterDoClose(SDataFileRAWWriter *writer) { return 0; } -int32_t tsdbDataFileRAWWriterDoOpen(SDataFileRAWWriter *writer) { - int32_t code = 0; - int32_t lino = 0; - - writer->file = writer->config->file; - writer->ctx->offset = 0; - - writer->ctx->opened = true; - -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); - } - return code; -} - static int32_t tsdbDataFileRAWWriterCloseCommit(SDataFileRAWWriter *writer, TFileOpArray *opArr) { int32_t code = 0; int32_t lino = 0; - STFileOp op; + ASSERT(writer->ctx->offset == writer->file.size); + ASSERT(writer->config->fid == writer->file.fid); - op = (STFileOp){ + STFileOp op = (STFileOp){ .optype = TSDB_FOP_CREATE, .fid = writer->config->fid, .nf = writer->file, @@ -147,7 +146,7 @@ static int32_t tsdbDataFileRAWWriterOpenDataFD(SDataFileRAWWriter *writer) { char fname[TSDB_FILENAME_LEN]; int32_t flag = TD_FILE_READ | TD_FILE_WRITE; - if (writer->file.size == 0) { + if (writer->ctx->offset == 0) { flag |= (TD_FILE_CREATE | TD_FILE_TRUNC); } @@ -162,6 +161,24 @@ _exit: return code; } +int32_t tsdbDataFileRAWWriterDoOpen(SDataFileRAWWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + writer->file = writer->config->file; + writer->ctx->offset = 0; + + code = tsdbDataFileRAWWriterOpenDataFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->opened = true; +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + int32_t tsdbDataFileRAWWriterClose(SDataFileRAWWriter **writer, bool abort, TFileOpArray *opArr) { if (writer[0] == NULL) return 0; @@ -195,7 +212,6 @@ int32_t tsdbDataFileRAWWriteBlockData(SDataFileRAWWriter *writer, const STsdbDat code = tsdbWriteFile(writer->fd, writer->ctx->offset, (const uint8_t *)pDataBlock->data, pDataBlock->dataLength); TSDB_CHECK_CODE(code, lino, _exit); - writer->file.size += pDataBlock->dataLength; writer->ctx->offset += pDataBlock->dataLength; _exit: diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c index d9cd419ef9..03c12502d5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRAW.c @@ -74,6 +74,7 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA SDataFileRAWWriterConfig config = { .tsdb = writer->config->tsdb, .szPage = writer->config->szPage, + .fid = bHdr->file.fid, .did = writer->config->did, .cid = writer->config->cid, .level = writer->config->level, @@ -81,6 +82,7 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA .file = { .type = bHdr->file.type, + .fid = bHdr->file.fid, .did = writer->config->did, .cid = writer->config->cid, .size = bHdr->file.size, @@ -92,6 +94,9 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA }, }; + writer->ctx->offset = 0; + writer->ctx->file = config.file; + code = tsdbDataFileRAWWriterOpen(&config, &writer->dataWriter); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index cb3d346df5..2a8484bcd2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -277,6 +277,8 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // TSDB ============== + pReader->tsdbDone = true; + if (!pReader->tsdbDone) { // open if not if (pReader->pTsdbReader == NULL) { @@ -641,6 +643,10 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * tsdbSnapWriterPrepareClose(pWriter->pTsdbSnapWriter); } + if (pWriter->pTsdbSnapRAWWriter) { + tsdbSnapRAWWriterPrepareClose(pWriter->pTsdbSnapRAWWriter); + } + if (pWriter->pRsmaSnapWriter) { rsmaSnapWriterPrepareClose(pWriter->pRsmaSnapWriter); } @@ -677,6 +683,11 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } + if (pWriter->pTsdbSnapRAWWriter) { + code = tsdbSnapRAWWriterClose(&pWriter->pTsdbSnapRAWWriter, rollback); + if (code) goto _exit; + } + if (pWriter->pTqSnapWriter) { code = tqSnapWriterClose(&pWriter->pTqSnapWriter, rollback); if (code) goto _exit; From 6c4c0242fd4c7a9565ecf5d4dbf29a3cc96746e4 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 27 Nov 2023 11:16:09 +0800 Subject: [PATCH 38/65] refact: adjust logging format in tsdbSnapRAWReaderOpen --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index 7010e6ad27..ff121a3d30 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -62,14 +62,13 @@ int32_t tsdbSnapRAWReaderOpen(STsdb* tsdb, int64_t ever, int8_t type, STsdbSnapR _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), - __func__, lino, tstrerror(code), 0, ever, type); + tsdbError("vgId:%d %s failed at line %d since %s, sver:0, ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, + lino, tstrerror(code), ever, type); tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); taosMemoryFree(reader[0]); reader[0] = NULL; } else { - tsdbInfo("vgId:%d tsdb snapshot reader opened. sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), 0, - ever, type); + tsdbInfo("vgId:%d tsdb snapshot reader opened. sver:0, ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), ever, type); } return code; } @@ -387,7 +386,7 @@ _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, 0, ever); + tsdbInfo("vgId:%d %s done, sver:0, ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, ever); } return code; } From ef34176e37f76202931a918cc4c1de7d95a3011f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 30 Nov 2023 16:54:17 +0800 Subject: [PATCH 39/65] fix: close data file readers in tsbSnapRAWReaderClose --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index ff121a3d30..c3503a0cd6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -18,6 +18,8 @@ #include "tsdbFS2.h" #include "tsdbFSetRAW.h" +static int32_t tsdbSnapRAWReadFileSetCloseReader(STsdbSnapRAWReader* reader); + // reader typedef struct SDataFileRAWReaderIter { @@ -81,6 +83,7 @@ int32_t tsdbSnapRAWReaderClose(STsdbSnapRAWReader** reader) { STsdb* tsdb = reader[0]->tsdb; + tsdbSnapRAWReadFileSetCloseReader(reader[0]); tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); taosMemoryFree(reader[0]); reader[0] = NULL; From 52672657c17f56c90d0e1fccbf35347baea1c16c Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 5 Dec 2023 11:36:58 +0800 Subject: [PATCH 40/65] feat: toggle tsdb snap replication mode by snap info handshake ahead of time --- source/dnode/vnode/src/inc/tsdb.h | 14 +++ source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | 139 +++++++++++++++++++++ source/dnode/vnode/src/vnd/vnodeSnapshot.c | 34 ++++- 3 files changed, 183 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index dc3aa418b4..95982abfbe 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -715,6 +715,20 @@ int32_t tSerializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFS int32_t tDeserializeTsdbFSetPartList(void *buf, int32_t bufLen, STsdbFSetPartList *pList); int32_t tsdbFSetPartListToRangeDiff(STsdbFSetPartList *pList, TFileSetRangeArray **ppRanges); +// snap rep format +typedef enum ETsdbRepFmt { + TSDB_SNAP_REP_FMT_DEFAULT = 0, + TSDB_SNAP_REP_FMT_RAW, + TSDB_SNAP_REP_FMT_HYBRID, +} ETsdbRepFmt; + +typedef struct STsdbRepOpts { + ETsdbRepFmt format; +} STsdbRepOpts; + +int32_t tSerializeTsdbRepOpts(void *buf, int32_t bufLen, STsdbRepOpts *pInfo); +int32_t tDeserializeTsdbRepOpts(void *buf, int32_t bufLen, STsdbRepOpts *pInfo); + // snap read struct STsdbReadSnap { SMemTable *pMem; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c index 65ee1a7db3..9dae9bdd36 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapInfo.c @@ -443,6 +443,126 @@ static int32_t tsdbPartitionInfoSerialize(STsdbPartitionInfo* pInfo, uint8_t* bu return offset; } +// tsdb replication opts +static int32_t tTsdbRepOptsDataLenCalc(STsdbRepOpts* pInfo) { + int32_t hdrLen = sizeof(int32_t); + int32_t datLen = 0; + + int8_t msgVer = 0; + int64_t reserved64 = 0; + int16_t format = 0; + hdrLen += sizeof(msgVer); + datLen += hdrLen; + datLen += sizeof(format); + datLen += sizeof(reserved64); + datLen += sizeof(*pInfo); + return datLen; +} + +int32_t tSerializeTsdbRepOpts(void* buf, int32_t bufLen, STsdbRepOpts* pOpts) { + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + + int64_t reserved64 = 0; + int8_t msgVer = TSDB_SNAP_MSG_VER; + + if (tStartEncode(&encoder) < 0) goto _err; + if (tEncodeI8(&encoder, msgVer) < 0) goto _err; + int16_t format = pOpts->format; + if (tEncodeI16(&encoder, format) < 0) goto _err; + if (tEncodeI64(&encoder, reserved64) < 0) goto _err; + + tEndEncode(&encoder); + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + return tlen; + +_err: + tEncoderClear(&encoder); + return -1; +} + +int32_t tDeserializeTsdbRepOpts(void* buf, int32_t bufLen, STsdbRepOpts* pOpts) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); + + int64_t reserved64 = 0; + int8_t msgVer = 0; + + if (tStartDecode(&decoder) < 0) goto _err; + if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; + if (msgVer != TSDB_SNAP_MSG_VER) goto _err; + int16_t format = 0; + if (tDecodeI16(&decoder, &format) < 0) goto _err; + pOpts->format = format; + if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; + + tEndDecode(&decoder); + tDecoderClear(&decoder); + return 0; + +_err: + tDecoderClear(&decoder); + return -1; +} + +static int32_t tsdbRepOptsEstSize(STsdbRepOpts* pOpts) { + int32_t dataLen = 0; + dataLen += sizeof(SSyncTLV); + dataLen += tTsdbRepOptsDataLenCalc(pOpts); + return dataLen; +} + +static int32_t tsdbRepOptsSerialize(STsdbRepOpts* pOpts, void* buf, int32_t bufLen) { + SSyncTLV* pSubHead = buf; + int32_t offset = 0; + int32_t tlen = 0; + if ((tlen = tSerializeTsdbRepOpts(pSubHead->val, bufLen, pOpts)) < 0) { + return -1; + } + pSubHead->typ = SNAP_DATA_RAW; + pSubHead->len = tlen; + offset += sizeof(*pSubHead) + tlen; + return offset; +} + +// snap info +static int32_t tsdbSnapPrepDealWithSnapInfo(SVnode* pVnode, SSnapshot* pSnap, STsdbRepOpts* pInfo) { + if (!pSnap->data) return 0; + int32_t code = -1; + + SSyncTLV* pHead = (void*)pSnap->data; + int32_t offset = 0; + + while (offset + sizeof(*pHead) < pHead->len) { + SSyncTLV* pField = (void*)(pHead->val + offset); + offset += sizeof(*pField) + pField->len; + void* buf = pField->val; + int32_t bufLen = pField->len; + + switch (pField->typ) { + case SNAP_DATA_TSDB: + case SNAP_DATA_RSMA1: + case SNAP_DATA_RSMA2: { + } break; + case SNAP_DATA_RAW: { + if (tDeserializeTsdbRepOpts(buf, bufLen, pInfo) < 0) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + tsdbError("vgId:%d, failed to deserialize tsdb rep opts since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } break; + default: + tsdbError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), pField->typ); + goto _out; + } + } + + code = 0; +_out: + return code; +} + int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { ASSERT(pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY); STsdbPartitionInfo partitionInfo = {0}; @@ -453,10 +573,22 @@ int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { goto _out; } + // deal with snap info for reply + STsdbRepOpts opts = {.format = TSDB_SNAP_REP_FMT_RAW}; + if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + STsdbRepOpts leaderOpts = {0}; + if (tsdbSnapPrepDealWithSnapInfo(pVnode, pSnap, &leaderOpts) < 0) { + tsdbError("vgId:%d, failed to deal with snap info for reply since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + opts.format = TMIN(opts.format, leaderOpts.format); + } + // info data realloc const int32_t headLen = sizeof(SSyncTLV); int32_t bufLen = headLen; bufLen += tsdbPartitionInfoEstSize(pInfo); + bufLen += tsdbRepOptsEstSize(&opts); if (syncSnapInfoDataRealloc(pSnap, bufLen) != 0) { tsdbError("vgId:%d, failed to realloc memory for data of snap info. bytes:%d", TD_VID(pVnode), bufLen); goto _out; @@ -474,6 +606,13 @@ int32_t tsdbSnapPrepDescription(SVnode* pVnode, SSnapshot* pSnap) { offset += tlen; ASSERT(offset <= bufLen); + if ((tlen = tsdbRepOptsSerialize(&opts, buf + offset, bufLen - offset)) < 0) { + tsdbError("vgId:%d, failed to serialize tsdb rep opts since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + offset += tlen; + ASSERT(offset <= bufLen); + // set header of info data SSyncTLV* pHead = pSnap->data; pHead->typ = pSnap->type; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 2a8484bcd2..438fa35713 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -92,14 +92,16 @@ static int32_t vnodeSnapReaderDealWithSnapInfo(SVSnapReader *pReader, SSnapshotP int32_t code = -1; if (pParam->data) { + // decode SSyncTLV *datHead = (void *)pParam->data; if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { terrno = TSDB_CODE_INVALID_DATA_FMT; goto _out; } + STsdbRepOpts tsdbOpts = {0}; TFileSetRangeArray **ppRanges = NULL; - int32_t offset = 0; + int32_t offset = 0; while (offset + sizeof(SSyncTLV) < datHead->len) { SSyncTLV *subField = (void *)(datHead->val + offset); @@ -121,13 +123,30 @@ static int32_t vnodeSnapReaderDealWithSnapInfo(SVSnapReader *pReader, SSnapshotP goto _out; } } break; + case SNAP_DATA_RAW: { + if (tDeserializeTsdbRepOpts(buf, bufLen, &tsdbOpts) < 0) { + vError("vgId:%d, failed to deserialize tsdb rep opts since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } break; default: vError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), subField->typ); goto _out; } } - } + // toggle snap replication mode + vInfo("vgId:%d, vnode snap reader supported tsdb rep of format:%d", TD_VID(pVnode), tsdbOpts.format); + if (pReader->sver == 0 && tsdbOpts.format == TSDB_SNAP_REP_FMT_RAW) { + pReader->tsdbDone = true; + } else { + pReader->tsdbRAWDone = true; + } + + ASSERT(pReader->tsdbDone != pReader->tsdbRAWDone); + vInfo("vgId:%d, vnode snap writer enabled replication mode: %s", TD_VID(pVnode), + (pReader->tsdbDone ? "raw" : "normal")); + } code = 0; _out: return code; @@ -277,8 +296,6 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // TSDB ============== - pReader->tsdbDone = true; - if (!pReader->tsdbDone) { // open if not if (pReader->pTsdbReader == NULL) { @@ -534,6 +551,7 @@ static int32_t vnodeSnapWriterDealWithSnapInfo(SVSnapWriter *pWriter, SSnapshotP goto _out; } + STsdbRepOpts tsdbOpts = {0}; TFileSetRangeArray **ppRanges = NULL; int32_t offset = 0; @@ -557,11 +575,19 @@ static int32_t vnodeSnapWriterDealWithSnapInfo(SVSnapWriter *pWriter, SSnapshotP goto _out; } } break; + case SNAP_DATA_RAW: { + if (tDeserializeTsdbRepOpts(buf, bufLen, &tsdbOpts) < 0) { + vError("vgId:%d, failed to deserialize tsdb rep opts since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } break; default: vError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), subField->typ); goto _out; } } + + vInfo("vgId:%d, vnode snap writer supported tsdb rep of format:%d", TD_VID(pVnode), tsdbOpts.format); } code = 0; From 28e1d836628a71de0e23f731f03e7f649d079bce Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 6 Dec 2023 16:38:09 +0800 Subject: [PATCH 41/65] fix: adjust tsdbReadFile call in tsdbDataFileRAWReadBlockData --- source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c index 4a72fcfda8..3f448379c9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c @@ -72,7 +72,7 @@ int32_t tsdbDataFileRAWReadBlockData(SDataFileRAWReader *reader, STsdbDataRAWBlo pBlock->file.maxVer = reader->config->file.maxVer; pBlock->file.stt->level = reader->config->file.stt->level; - code = tsdbReadFile(reader->fd, pBlock->offset, pBlock->data, pBlock->dataLength); + code = tsdbReadFile(reader->fd, pBlock->offset, pBlock->data, pBlock->dataLength, 0); TSDB_CHECK_CODE(code, lino, _exit); _exit: From 5cdf2b0b041755c3316512ee40be667c9a734634 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 6 Dec 2023 16:40:04 +0800 Subject: [PATCH 42/65] fixup: remove call to obsolete funcs of BgTask in tsdbSnapRAWWriter open and close --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index c3503a0cd6..6b61fcc324 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -363,9 +363,6 @@ int32_t tsdbSnapRAWWriterOpen(STsdb* pTsdb, int64_t ever, STsdbSnapRAWWriter** w int32_t code = 0; int32_t lino = 0; - // disable background tasks - tsdbFSDisableBgTask(pTsdb->pFS); - // start to write writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; @@ -528,7 +525,6 @@ int32_t tsdbSnapRAWWriterClose(STsdbSnapRAWWriter** writer, int8_t rollback) { taosThreadMutexUnlock(&writer[0]->tsdb->mutex); } - tsdbFSEnableBgTask(tsdb->pFS); TARRAY2_DESTROY(writer[0]->fopArr, NULL); tsdbFSDestroyCopySnapshot(&writer[0]->fsetArr); From ebd3b697451e4eaa40e70e8084438ffaedbcfa29 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 6 Dec 2023 17:51:40 +0800 Subject: [PATCH 43/65] enh: control playload size of tsdb snap replication with TSDB_SNAP_DATA_PAYLOAD_SIZE --- include/util/tdef.h | 1 + source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h | 12 ------------ source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 3 +-- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index 1a440c7268..51b0b63da2 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -288,6 +288,7 @@ typedef enum ELogicConditionType { #define TSDB_CONN_ACTIVE_KEY_LEN 255 #define TSDB_DEFAULT_PKT_SIZE 65480 // same as RPC_MAX_UDP_SIZE +#define TSDB_SNAP_DATA_PAYLOAD_SIZE (1 * 1024 * 1024) #define TSDB_PAYLOAD_SIZE TSDB_DEFAULT_PKT_SIZE #define TSDB_DEFAULT_PAYLOAD_SIZE 5120 // default payload size, greater than PATH_MAX value diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h index 49f80b0be5..d765671698 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.h @@ -26,18 +26,6 @@ extern "C" { #endif -#define TSDB_SNAP_RAW_PAYLOAD_SIZE (4096 * 1024) -#if 0 -struct SDataRAWBlock { - int8_t *data; - int64_t size; -}; - -int32_t tsdbDataRAWBlockReset(SDataRAWBlock *pBlock); -int32_t tsdbDataRAWBlockAlloc(SDataRawBlock *pBlock); -void tsdbDataRAWBlockFree(SDataRAWBlock *pBlock); -#endif - // STsdbDataRAWBlockHeader ======================================= typedef struct STsdbDataRAWBlockHeader { struct { diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 8f5394a9bc..6aff1c2930 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -331,7 +331,7 @@ static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* reader, uint8_t** dat if (!(reader->blockData->nRow % 16)) { int64_t nData = tBlockDataSize(reader->blockData); - if (nData >= 1 * 1024 * 1024) { + if (nData >= TSDB_SNAP_DATA_PAYLOAD_SIZE) { break; } } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index 6b61fcc324..462afcbec3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -21,7 +21,6 @@ static int32_t tsdbSnapRAWReadFileSetCloseReader(STsdbSnapRAWReader* reader); // reader - typedef struct SDataFileRAWReaderIter { int32_t count; int32_t idx; @@ -180,7 +179,7 @@ static int32_t tsdbSnapRAWReadFileSetCloseIter(STsdbSnapRAWReader* reader) { } static int64_t tsdbSnapRAWReadPeek(SDataFileRAWReader* reader) { - int64_t size = TMIN(reader->config->file.size - reader->ctx->offset, TSDB_SNAP_RAW_PAYLOAD_SIZE); + int64_t size = TMIN(reader->config->file.size - reader->ctx->offset, TSDB_SNAP_DATA_PAYLOAD_SIZE); return size; } From 1255b46469b274682c5dc216d4a40b65897521e6 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 7 Dec 2023 14:49:31 +0800 Subject: [PATCH 44/65] fix: destroy dataReaderArr properly in tsdbSnapRAWReaderClose --- source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | 2 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++++ source/libs/sync/src/syncSnapshot.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c index 462afcbec3..b7c22aa0e9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c @@ -82,7 +82,7 @@ int32_t tsdbSnapRAWReaderClose(STsdbSnapRAWReader** reader) { STsdb* tsdb = reader[0]->tsdb; - tsdbSnapRAWReadFileSetCloseReader(reader[0]); + TARRAY2_DESTROY(reader[0]->dataReaderArr, tsdbDataFileRAWReaderClose); tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); taosMemoryFree(reader[0]); reader[0] = NULL; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 438fa35713..ed1dcc64c9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -203,6 +203,10 @@ void vnodeSnapReaderClose(SVSnapReader *pReader) { tsdbSnapReaderClose(&pReader->pTsdbReader); } + if (pReader->pTsdbRAWReader) { + tsdbSnapRAWReaderClose(&pReader->pTsdbRAWReader); + } + if (pReader->pMetaReader) { metaSnapReaderClose(&pReader->pMetaReader); } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 1e3614857e..93e81fd8e2 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -1108,7 +1108,7 @@ static int32_t syncSnapBufferSend(SSyncSnapshotSender *pSender, SyncSnapshotRsp goto _out; } - if (pSender->pReader == NULL || pSender->finish) { + if (pSender->pReader == NULL || pSender->finish || !snapshotSenderIsStart(pSender)) { code = terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _out; } From 7c17d6f31333895205bd89c6370410be8cc459ff Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Dec 2023 23:12:54 +0800 Subject: [PATCH 45/65] fix(tsdb): opt read performance by check clean stt block if only row number required. --- include/common/tcommon.h | 1 + include/libs/executor/storageapi.h | 3 +- source/dnode/vnode/inc/vnode.h | 3 +- source/dnode/vnode/src/inc/tsdb.h | 59 +-- source/dnode/vnode/src/tsdb/tsdbCache.c | 2 +- source/dnode/vnode/src/tsdb/tsdbMergeTree.c | 240 +++++------ source/dnode/vnode/src/tsdb/tsdbRead2.c | 416 ++++++-------------- source/dnode/vnode/src/tsdb/tsdbReadUtil.c | 270 +++++++++++-- source/dnode/vnode/src/tsdb/tsdbReadUtil.h | 23 +- source/dnode/vnode/src/vnd/vnodeInitApi.c | 2 +- source/libs/executor/src/executor.c | 4 +- source/libs/executor/src/scanoperator.c | 6 +- source/libs/executor/src/sysscanoperator.c | 2 +- 13 files changed, 536 insertions(+), 495 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 87a6a90a7e..81e3af88a5 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -253,6 +253,7 @@ typedef struct SQueryTableDataCond { STimeWindow twindows; int64_t startVersion; int64_t endVersion; + bool trimData; // response the actual data, not only the rows in the attribute of info.row of ssdatablock } SQueryTableDataCond; int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock); diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 045f2bad70..e8e90541d1 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -154,8 +154,7 @@ typedef struct { /*-------------------------------------------------new api format---------------------------------------------------*/ typedef struct TsdReader { int32_t (*tsdReaderOpen)(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, - SHashObj** pIgnoreTables); + SSDataBlock* pResBlock, void** ppReader, const char* idstr, SHashObj** pIgnoreTables); void (*tsdReaderClose)(); void (*tsdSetReaderTaskId)(void *pReader, const char *pId); int32_t (*tsdSetQueryTableList)(); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 74e4b66098..32ee7526c0 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -154,8 +154,7 @@ typedef struct STsdbReader STsdbReader; #define CACHESCAN_RETRIEVE_LAST 0x8 int32_t tsdbReaderOpen2(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, - SSDataBlock *pResBlock, void **ppReader, const char *idstr, bool countOnly, - SHashObj **pIgnoreTables); + SSDataBlock *pResBlock, void **ppReader, const char *idstr, SHashObj **pIgnoreTables); int32_t tsdbSetTableList2(STsdbReader *pReader, const void *pTableList, int32_t num); void tsdbReaderSetId2(STsdbReader *pReader, const char *idstr); void tsdbReaderClose2(STsdbReader *pReader); diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 6d76529101..3876048010 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -675,8 +675,6 @@ struct SDelFWriter { }; #include "tarray2.h" -// #include "tsdbFS2.h" -// struct STFileSet; typedef struct STFileSet STFileSet; typedef TARRAY2(STFileSet *) TFileSetArray; @@ -772,20 +770,25 @@ typedef struct SBlockDataInfo { int32_t sttBlockIndex; } SBlockDataInfo; -typedef struct SSttBlockLoadInfo { - SBlockDataInfo blockData[2]; // buffered block data - int32_t statisBlockIndex; // buffered statistics block index - void *statisBlock; // buffered statistics block data - void *pSttStatisBlkArray; - SArray *aSttBlk; - int32_t currentLoadBlockIndex; - STSchema *pSchema; - int16_t *colIds; - int32_t numOfCols; - bool checkRemainingRow; // todo: no assign value? - bool isLast; - bool sttBlockLoaded; +// todo: move away +typedef struct { + SArray *pUid; + SArray *pFirstKey; + SArray *pLastKey; + SArray *pCount; +} SSttTableRowsInfo; +typedef struct SSttBlockLoadInfo { + SBlockDataInfo blockData[2]; // buffered block data + SArray *aSttBlk; + int32_t currentLoadBlockIndex; + STSchema *pSchema; + int16_t *colIds; + int32_t numOfCols; + bool checkRemainingRow; // todo: no assign value? + bool isLast; + bool sttBlockLoaded; + SSttTableRowsInfo info; SSttBlockLoadCostInfo cost; } SSttBlockLoadInfo; @@ -874,27 +877,31 @@ typedef struct { _load_tomb_fn loadTombFn; void *pReader; void *idstr; + bool rspRows; // response the rows in stt-file, if possible } SMergeTreeConf; -int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf); +typedef struct SSttDataInfoForTable { + SArray* pTimeWindowList; + int64_t numOfRows; +} SSttDataInfoForTable; -void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); -bool tMergeTreeNext(SMergeTree *pMTree); -void tMergeTreePinSttBlock(SMergeTree *pMTree); -void tMergeTreeUnpinSttBlock(SMergeTree *pMTree); -bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); -void tMergeTreeClose(SMergeTree *pMTree); +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf, SSttDataInfoForTable* pTableInfo); +void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); +bool tMergeTreeNext(SMergeTree *pMTree); +void tMergeTreePinSttBlock(SMergeTree *pMTree); +void tMergeTreeUnpinSttBlock(SMergeTree *pMTree); +bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); +void tMergeTreeClose(SMergeTree *pMTree); SSttBlockLoadInfo *tCreateSttBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols); -void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo *pLoadCost); void *destroySttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); void *destroySttBlockReader(SArray *pLDataIterArray, SSttBlockLoadCostInfo *pLoadCost); // tsdbCache ============================================================================================== typedef enum { - READ_MODE_COUNT_ONLY = 0x1, - READ_MODE_ALL, -} EReadMode; + READER_EXEC_DATA = 0x1, + READER_EXEC_ROWS = 0x2, +} EExecMode; typedef struct { TSKEY ts; diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 5076599753..5fc0e333b9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1870,7 +1870,7 @@ static int32_t lastIterOpen(SFSLastIter *iter, STFileSet *pFileSet, STsdb *pTsdb .idstr = pr->idstr, }; - code = tMergeTreeOpen2(&iter->mergeTree, &conf); + code = tMergeTreeOpen2(&iter->mergeTree, &conf, NULL); if (code != TSDB_CODE_SUCCESS) { return -1; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 8017f1f4d0..ee92edc2a9 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -15,6 +15,7 @@ #include "tsdb.h" #include "tsdbFSet2.h" +#include "tsdbUtil2.h" #include "tsdbMerge.h" #include "tsdbReadUtil.h" #include "tsdbSttFileRW.h" @@ -52,15 +53,6 @@ SSttBlockLoadInfo *tCreateSttBlockLoadInfo(STSchema *pSchema, int16_t *colList, return pLoadInfo; } -void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo* pLoadCost) { - for (int32_t i = 0; i < 1; ++i) { - pLoadCost->blockElapsedTime += pLoadInfo[i].cost.blockElapsedTime; - pLoadCost->loadBlocks += pLoadInfo[i].cost.loadBlocks; - pLoadCost->loadStatisBlocks += pLoadInfo[i].cost.loadStatisBlocks; - pLoadCost->statisElapsedTime += pLoadInfo[i].cost.statisElapsedTime; - } -} - void *destroySttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { if (pLoadInfo == NULL) { return NULL; @@ -78,9 +70,11 @@ void *destroySttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { pInfo->sttBlockIndex = -1; pInfo->pin = false; - if (pLoadInfo->statisBlock != NULL) { - tStatisBlockDestroy(pLoadInfo->statisBlock); - taosMemoryFreeClear(pLoadInfo->statisBlock); + if (pLoadInfo->info.pCount != NULL) { + taosArrayDestroy(pLoadInfo->info.pUid); + taosArrayDestroy(pLoadInfo->info.pFirstKey); + taosArrayDestroy(pLoadInfo->info.pLastKey); + taosArrayDestroy(pLoadInfo->info.pCount); } taosArrayDestroy(pLoadInfo->aSttBlk); @@ -323,95 +317,74 @@ static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray return TSDB_CODE_SUCCESS; } -static int32_t suidComparFn(const void *target, const void *p2) { - const uint64_t *targetUid = target; - const uint64_t *uid2 = p2; - if (*uid2 == (*targetUid)) { +static int32_t loadSttStatisticsBlockData(SSttFileReader *pSttFileReader, SSttBlockLoadInfo *pBlockLoadInfo, + TStatisBlkArray *pStatisBlkArray, uint64_t suid, const char *id) { + int32_t numOfBlocks = TARRAY2_SIZE(pStatisBlkArray); + if (numOfBlocks <= 0) { return 0; - } else { - return (*targetUid) < (*uid2) ? -1 : 1; - } -} - -static bool existsFromSttBlkStatis(SSttBlockLoadInfo *pBlockLoadInfo, uint64_t suid, uint64_t uid, - SSttFileReader *pReader) { - const TStatisBlkArray *pStatisBlkArray = pBlockLoadInfo->pSttStatisBlkArray; - if (TARRAY2_SIZE(pStatisBlkArray) <= 0) { - return true; } - int32_t i = 0; - for (i = 0; i < TARRAY2_SIZE(pStatisBlkArray); ++i) { - SStatisBlk *p = &pStatisBlkArray->data[i]; - if (p->minTbid.suid <= suid && p->maxTbid.suid >= suid) { - break; - } + int32_t startIndex = 0; + while((startIndex < numOfBlocks) && (pStatisBlkArray->data[startIndex].maxTbid.suid < suid)) { + ++startIndex; } - if (i >= TARRAY2_SIZE(pStatisBlkArray)) { - return false; + if (startIndex >= numOfBlocks || pStatisBlkArray->data[startIndex].minTbid.suid > suid) { + return 0; } - while (i < TARRAY2_SIZE(pStatisBlkArray)) { - SStatisBlk *p = &pStatisBlkArray->data[i]; - if (p->minTbid.suid > suid) { - return false; + int32_t endIndex = startIndex; + while(endIndex < numOfBlocks && pStatisBlkArray->data[endIndex].minTbid.suid <= suid) { + ++endIndex; + } + + int32_t num = endIndex - startIndex; + pBlockLoadInfo->cost.loadStatisBlocks += num; + + STbStatisBlock block; + tStatisBlockInit(&block); + + int64_t st = taosGetTimestampUs(); + + for(int32_t k = startIndex; k < endIndex; ++k) { + tsdbSttFileReadStatisBlock(pSttFileReader, &pStatisBlkArray->data[k], &block); + + int32_t i = 0; + while(block.suid->data[i] != suid) { + ++i; } -// if (pBlockLoadInfo->statisBlock == NULL) { -// pBlockLoadInfo->statisBlock = taosMemoryCalloc(1, sizeof(STbStatisBlock)); -// -// int64_t st = taosGetTimestampMs(); -// tsdbSttFileReadStatisBlock(pReader, p, pBlockLoadInfo->statisBlock); -// pBlockLoadInfo->statisBlockIndex = i; -// -// double el = (taosGetTimestampMs() - st) / 1000.0; -// pBlockLoadInfo->cost.loadStatisBlocks += 1; -// pBlockLoadInfo->cost.statisElapsedTime += el; -// } else if (pBlockLoadInfo->statisBlockIndex != i) { -// tStatisBlockDestroy(pBlockLoadInfo->statisBlock); -// -// int64_t st = taosGetTimestampMs(); -// tsdbSttFileReadStatisBlock(pReader, p, pBlockLoadInfo->statisBlock); -// pBlockLoadInfo->statisBlockIndex = i; -// -// double el = (taosGetTimestampMs() - st) / 1000.0; -// pBlockLoadInfo->cost.loadStatisBlocks += 1; -// pBlockLoadInfo->cost.statisElapsedTime += el; -// } - - STbStatisBlock* pBlock = pBlockLoadInfo->statisBlock; - int32_t index = tarray2SearchIdx(pBlock->suid, &suid, sizeof(int64_t), suidComparFn, TD_EQ); - if (index == -1) { - return false; + int32_t rows = TARRAY2_SIZE(block.suid); + if (pBlockLoadInfo->info.pUid == NULL) { + pBlockLoadInfo->info.pUid = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pFirstKey = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pLastKey = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pCount = taosArrayInit(rows, sizeof(int64_t)); } - int32_t j = index; - if (pBlock->uid->data[j] == uid) { - return true; - } else if (pBlock->uid->data[j] > uid) { - while (j >= 0 && pBlock->suid->data[j] == suid) { - if (pBlock->uid->data[j] == uid) { - return true; - } else { - j -= 1; - } - } + if (pStatisBlkArray->data[k].maxTbid.suid == suid) { + taosArrayAddBatch(pBlockLoadInfo->info.pUid, &block.uid->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pCount, &block.count->data[i], rows - i); } else { - j = index + 1; - while (j < pBlock->suid->size && pBlock->suid->data[j] == suid) { - if (pBlock->uid->data[j] == uid) { - return true; - } else { - j += 1; - } + while(i < rows && block.suid->data[i] == suid) { + taosArrayPush(pBlockLoadInfo->info.pUid, &block.uid->data[i]); + taosArrayPush(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i]); + taosArrayPush(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i]); + taosArrayPush(pBlockLoadInfo->info.pCount, &block.count->data[i]); + i += 1; } } - - i += 1; } - return false; + tStatisBlockDestroy(&block); + + double el = (taosGetTimestampUs() - st) / 1000.0; + pBlockLoadInfo->cost.statisElapsedTime += el; + + tsdbDebug("%s load %d statis blocks into buf, elapsed time:%.2fms", id, num, el); + return TSDB_CODE_SUCCESS; } static int32_t doLoadSttFilesBlk(SSttBlockLoadInfo *pBlockLoadInfo, SLDataIter *pIter, int64_t suid, @@ -428,19 +401,28 @@ static int32_t doLoadSttFilesBlk(SSttBlockLoadInfo *pBlockLoadInfo, SLDataIter * return code; } + // load the stt block info for each stt file block code = extractSttBlockInfo(pIter, pSttBlkArray, pBlockLoadInfo, suid); if (code != TSDB_CODE_SUCCESS) { tsdbError("load stt block info failed, code:%s, %s", tstrerror(code), idStr); return code; } - // load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file - code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pBlockLoadInfo->pSttStatisBlkArray); + // load stt statistics block for all stt-blocks, to decide if the data of queried table exists in current stt file + TStatisBlkArray *pStatisBlkArray = NULL; + code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pStatisBlkArray); if (code != TSDB_CODE_SUCCESS) { tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), idStr); return code; } + // load statistics block for all tables in current stt file + code = loadSttStatisticsBlockData(pIter->pReader, pIter->pBlockLoadInfo, pStatisBlkArray, suid, idStr); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to load stt statistics block data, code:%s, %s", tstrerror(code), idStr); + return code; + } + code = loadTombFn(pReader1, pIter->pReader, pIter->pBlockLoadInfo); double el = (taosGetTimestampUs() - st) / 1000.0; @@ -448,19 +430,44 @@ static int32_t doLoadSttFilesBlk(SSttBlockLoadInfo *pBlockLoadInfo, SLDataIter * return code; } +static int32_t uidComparFn(const void* p1, const void* p2) { + const uint64_t *pFirst = p1; + const uint64_t *pVal = p2; + + if (*pFirst == *pVal) { + return 0; + } else { + return *pFirst < *pVal? -1:1; + } +} + +static void setSttInfoForCurrentTable(SSttBlockLoadInfo *pLoadInfo, uint64_t uid, STimeWindow *pTimeWindow, + int64_t *numOfRows) { + if (pTimeWindow == NULL || taosArrayGetSize(pLoadInfo->info.pUid) == 0) { + return; + } + + int32_t index = taosArraySearchIdx(pLoadInfo->info.pUid, &uid, uidComparFn, TD_EQ); + if (index >= 0) { + pTimeWindow->skey = *(int64_t *)taosArrayGet(pLoadInfo->info.pFirstKey, index); + pTimeWindow->ekey = *(int64_t *)taosArrayGet(pLoadInfo->info.pLastKey, index); + + *numOfRows += *(int64_t*) taosArrayGet(pLoadInfo->info.pCount, index); + } +} + int32_t tLDataIterOpen2(SLDataIter *pIter, SSttFileReader *pSttFileReader, int32_t cid, int8_t backward, - uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, - SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange, - _load_tomb_fn loadTombFn, void *pReader1) { + SMergeTreeConf *pConf, SSttBlockLoadInfo *pBlockLoadInfo, STimeWindow *pTimeWindow, + int64_t *numOfRows, const char *idStr) { int32_t code = TSDB_CODE_SUCCESS; - pIter->uid = uid; + pIter->uid = pConf->uid; pIter->cid = cid; pIter->backward = backward; - pIter->verRange.minVer = pRange->minVer; - pIter->verRange.maxVer = pRange->maxVer; - pIter->timeWindow.skey = pTimeWindow->skey; - pIter->timeWindow.ekey = pTimeWindow->ekey; + pIter->verRange.minVer = pConf->verRange.minVer; + pIter->verRange.maxVer = pConf->verRange.maxVer; + pIter->timeWindow.skey = pConf->timewindow.skey; + pIter->timeWindow.ekey = pConf->timewindow.ekey; pIter->pReader = pSttFileReader; pIter->pBlockLoadInfo = pBlockLoadInfo; @@ -473,34 +480,29 @@ int32_t tLDataIterOpen2(SLDataIter *pIter, SSttFileReader *pSttFileReader, int32 } if (!pBlockLoadInfo->sttBlockLoaded) { - code = doLoadSttFilesBlk(pBlockLoadInfo, pIter, suid, loadTombFn, pReader1, idStr); + code = doLoadSttFilesBlk(pBlockLoadInfo, pIter, pConf->suid, pConf->loadTombFn, pConf->pReader, idStr); if (code != TSDB_CODE_SUCCESS) { return code; } } -// bool exists = existsFromSttBlkStatis(pBlockLoadInfo, suid, uid, pIter->pReader); -// if (!exists) { -// pIter->iSttBlk = -1; -// pIter->pSttBlk = NULL; -// return TSDB_CODE_SUCCESS; -// } + setSttInfoForCurrentTable(pBlockLoadInfo, pConf->uid, pTimeWindow, numOfRows); // find the start block, actually we could load the position to avoid repeatly searching for the start position when // the skey is updated. size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, uid, backward); + pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, pConf->uid, backward); if (pIter->iSttBlk != -1) { pIter->pSttBlk = taosArrayGet(pBlockLoadInfo->aSttBlk, pIter->iSttBlk); pIter->iRow = (pIter->backward) ? pIter->pSttBlk->nRow : -1; - if ((!backward) && ((strictTimeRange && pIter->pSttBlk->minKey >= pIter->timeWindow.ekey) || - (!strictTimeRange && pIter->pSttBlk->minKey > pIter->timeWindow.ekey))) { + if ((!backward) && ((pConf->strictTimeRange && pIter->pSttBlk->minKey >= pIter->timeWindow.ekey) || + (!pConf->strictTimeRange && pIter->pSttBlk->minKey > pIter->timeWindow.ekey))) { pIter->pSttBlk = NULL; } - if (backward && ((strictTimeRange && pIter->pSttBlk->maxKey <= pIter->timeWindow.skey) || - (!strictTimeRange && pIter->pSttBlk->maxKey < pIter->timeWindow.skey))) { + if (backward && ((pConf->strictTimeRange && pIter->pSttBlk->maxKey <= pIter->timeWindow.skey) || + (!pConf->strictTimeRange && pIter->pSttBlk->maxKey < pIter->timeWindow.skey))) { pIter->pSttBlk = NULL; pIter->ignoreEarlierTs = true; } @@ -708,8 +710,6 @@ bool tLDataIterNextRow(SLDataIter *pIter, const char *idStr) { return (terrno == TSDB_CODE_SUCCESS) && (pIter->pSttBlk != NULL) && (pBlockData != NULL); } -SRowInfo *tLDataIterGet(SLDataIter *pIter) { return &pIter->rInfo; } - // SMergeTree ================================================= static FORCE_INLINE int32_t tLDataIterCmprFn(const SRBTreeNode *p1, const SRBTreeNode *p2) { SLDataIter *pIter1 = (SLDataIter *)(((uint8_t *)p1) - offsetof(SLDataIter, node)); @@ -737,7 +737,7 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR return -1 * tLDataIterCmprFn(p1, p2); } -int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf, SSttDataInfoForTable* pSttDataInfo) { int32_t code = TSDB_CODE_SUCCESS; pMTree->pIter = NULL; @@ -758,17 +758,16 @@ int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { goto _end; } - // add the list/iter placeholder - adjustLDataIters(pConf->pSttFileBlockIterArray, pConf->pCurrentFileset); + adjustSttDataIters(pConf->pSttFileBlockIterArray, pConf->pCurrentFileset); for (int32_t j = 0; j < numOfLevels; ++j) { SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j]; - SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); + SArray * pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) { // open all last file SLDataIter *pIter = taosArrayGetP(pList, i); - SSttFileReader *pSttFileReader = pIter->pReader; + SSttFileReader * pSttFileReader = pIter->pReader; SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo; // open stt file reader if not opened yet @@ -790,10 +789,11 @@ int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { memset(pIter, 0, sizeof(SLDataIter)); + STimeWindow w = {0}; + int64_t numOfRows = 0; + int64_t cid = pSttLevel->fobjArr->data[i]->f->cid; - code = tLDataIterOpen2(pIter, pSttFileReader, cid, pMTree->backward, pConf->suid, pConf->uid, &pConf->timewindow, - &pConf->verRange, pLoadInfo, pMTree->idStr, pConf->strictTimeRange, pConf->loadTombFn, - pConf->pReader); + code = tLDataIterOpen2(pIter, pSttFileReader, cid, pMTree->backward, pConf, pLoadInfo, &w, &numOfRows, pMTree->idStr); if (code != TSDB_CODE_SUCCESS) { goto _end; } @@ -801,6 +801,12 @@ int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr); if (hasVal) { tMergeTreeAddIter(pMTree, pIter); + + // let's record the time window for current table of uid in the stt files + if (pSttDataInfo != NULL) { + taosArrayPush(pSttDataInfo->pTimeWindowList, &w); + pSttDataInfo->numOfRows += numOfRows; + } } else { if (!pMTree->ignoreEarlierTs) { pMTree->ignoreEarlierTs = pIter->ignoreEarlierTs; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 0753c2454d..e09ac504d0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -25,6 +25,16 @@ #define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) #define getCurrentKeyInSttBlock(_r) ((_r)->currentKey) +typedef struct { + bool overlapWithNeighborBlock; + bool hasDupTs; + bool overlapWithDelInfo; + bool overlapWithSttBlock; + bool overlapWithKeyInBuf; + bool partiallyRequired; + bool moreThanCapcity; +} SDataBlockToLoadInfo; + static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); static int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, STsdbReader* pReader); @@ -1221,78 +1231,6 @@ static bool keyOverlapFileBlock(TSDBKEY key, SFileDataBlockInfo* pBlock, SVersio (pBlock->record.maxVer >= pVerRange->minVer) && (pBlock->record.minVer <= pVerRange->maxVer); } -static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, - int32_t startIndex) { - size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); - - for (int32_t i = startIndex; i < num; i += 1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); - if (p->ts >= pRecord->firstKey && p->ts <= pRecord->lastKey) { - if (p->version >= pRecord->minVer) { - return true; - } - } else if (p->ts < pRecord->firstKey) { // p->ts < pBlock->minKey.ts - if (p->version >= pRecord->minVer) { - if (i < num - 1) { - TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); - if (pnext->ts >= pRecord->firstKey) { - return true; - } - } else { // it must be the last point - ASSERT(p->version == 0); - } - } - } else { // (p->ts > pBlock->maxKey.ts) { - return false; - } - } - - return false; -} - -static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order) { - if (pBlockScanInfo->delSkyline == NULL || (taosArrayGetSize(pBlockScanInfo->delSkyline) == 0)) { - return false; - } - - // ts is not overlap - TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); - TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); - if (pRecord->firstKey > pLast->ts || pRecord->lastKey < pFirst->ts) { - return false; - } - - // version is not overlap - if (ASCENDING_TRAVERSE(order)) { - return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, pBlockScanInfo->fileDelIndex); - } else { - int32_t index = pBlockScanInfo->fileDelIndex; - while (1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); - if (p->ts > pRecord->firstKey && index > 0) { - index -= 1; - } else { // find the first point that is smaller than the minKey.ts of dataBlock. - if (p->ts == pRecord->firstKey && p->version < pRecord->maxVer && index > 0) { - index -= 1; - } - break; - } - } - - return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, index); - } -} - -typedef struct { - bool overlapWithNeighborBlock; - bool hasDupTs; - bool overlapWithDelInfo; - bool overlapWithLastBlock; - bool overlapWithKeyInBuf; - bool partiallyRequired; - bool moreThanCapcity; -} SDataBlockToLoadInfo; - static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, STsdbReader* pReader) { SBrinRecord rec = {0}; @@ -1313,7 +1251,7 @@ static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* ASSERT(pScanInfo->sttKeyInfo.status != STT_FILE_READER_UNINIT); if (pScanInfo->sttKeyInfo.status == STT_FILE_HAS_DATA) { int64_t nextProcKeyInStt = pScanInfo->sttKeyInfo.nextProcKey; - pInfo->overlapWithLastBlock = + pInfo->overlapWithSttBlock = !(pBlockInfo->record.lastKey < nextProcKeyInStt || pBlockInfo->record.firstKey > nextProcKeyInStt); } @@ -1335,15 +1273,15 @@ static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pBlock bool loadDataBlock = (info.overlapWithNeighborBlock || info.hasDupTs || info.partiallyRequired || info.overlapWithKeyInBuf || - info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithLastBlock); + info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithSttBlock); // log the reason why load the datablock for profile if (loadDataBlock) { tsdbDebug("%p uid:%" PRIu64 " need to load the datablock, overlapneighbor:%d, hasDup:%d, partiallyRequired:%d, " - "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithlastBlock:%d, %s", + "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithSttBlock:%d, %s", pReader, pBlockInfo->uid, info.overlapWithNeighborBlock, info.hasDupTs, info.partiallyRequired, - info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithLastBlock, + info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithSttBlock, pReader->idStr); } @@ -1355,7 +1293,7 @@ static bool isCleanFileDataBlock(STsdbReader* pReader, SFileDataBlockInfo* pBloc SDataBlockToLoadInfo info = {0}; getBlockToLoadInfo(&info, pBlockInfo, pScanInfo, keyInBuf, pReader); bool isCleanFileBlock = !(info.overlapWithNeighborBlock || info.hasDupTs || info.overlapWithKeyInBuf || - info.overlapWithDelInfo || info.overlapWithLastBlock); + info.overlapWithDelInfo || info.overlapWithSttBlock); return isCleanFileBlock; } @@ -2110,27 +2048,34 @@ static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDum return true; } -static bool initSttBlockReader(SSttBlockReader* pLBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - // the last block reader has been initialized for this table. - if (pLBlockReader->uid == pScanInfo->uid) { - return hasDataInSttBlock(pLBlockReader); +static bool initSttBlockReader(SSttBlockReader* pSttBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + bool hasData = true; + + // the stt block reader has been initialized for this table. + if (pSttBlockReader->uid == pScanInfo->uid) { + return hasDataInSttBlock(pSttBlockReader); } - if (pLBlockReader->uid != 0) { - tMergeTreeClose(&pLBlockReader->mergeTree); + if (pSttBlockReader->uid != 0) { + tMergeTreeClose(&pSttBlockReader->mergeTree); } - pLBlockReader->uid = pScanInfo->uid; + pSttBlockReader->uid = pScanInfo->uid; - STimeWindow w = pLBlockReader->window; - if (ASCENDING_TRAVERSE(pLBlockReader->order)) { + // second time init stt block reader + if (pScanInfo->cleanSttBlocks && pReader->info.execMode == READER_EXEC_ROWS) { + return true; + } + + STimeWindow w = pSttBlockReader->window; + if (ASCENDING_TRAVERSE(pSttBlockReader->order)) { w.skey = pScanInfo->sttKeyInfo.nextProcKey; } else { w.ekey = pScanInfo->sttKeyInfo.nextProcKey; } int64_t st = taosGetTimestampUs(); - tsdbDebug("init last block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, + tsdbDebug("init stt block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, pScanInfo->uid, pReader->idStr); SMergeTreeConf conf = { @@ -2138,20 +2083,22 @@ static bool initSttBlockReader(SSttBlockReader* pLBlockReader, STableBlockScanIn .suid = pReader->info.suid, .pTsdb = pReader->pTsdb, .timewindow = w, - .verRange = pLBlockReader->verRange, + .verRange = pSttBlockReader->verRange, .strictTimeRange = false, .pSchema = pReader->info.pSchema, .pCurrentFileset = pReader->status.pCurrentFileset, - .backward = (pLBlockReader->order == TSDB_ORDER_DESC), + .backward = (pSttBlockReader->order == TSDB_ORDER_DESC), .pSttFileBlockIterArray = pReader->status.pLDataIterArray, .pCols = pReader->suppInfo.colId, .numOfCols = pReader->suppInfo.numOfCols, .loadTombFn = loadSttTombDataForAll, .pReader = pReader, .idstr = pReader->idStr, + .rspRows = (pReader->info.execMode == READER_EXEC_ROWS), }; - int32_t code = tMergeTreeOpen2(&pLBlockReader->mergeTree, &conf); + SSttDataInfoForTable info = {.pTimeWindowList = taosArrayInit(4, sizeof(STimeWindow))}; + int32_t code = tMergeTreeOpen2(&pSttBlockReader->mergeTree, &conf, &info); if (code != TSDB_CODE_SUCCESS) { return false; } @@ -2159,13 +2106,44 @@ static bool initSttBlockReader(SSttBlockReader* pLBlockReader, STableBlockScanIn initMemDataIterator(pScanInfo, pReader); initDelSkylineIterator(pScanInfo, pReader->info.order, &pReader->cost); - code = nextRowFromSttBlocks(pLBlockReader, pScanInfo, &pReader->info.verRange); + if (conf.rspRows) { + pScanInfo->cleanSttBlocks = + isCleanSttBlock(info.pTimeWindowList, &pReader->info.window, pScanInfo, pReader->info.order); + + if (pScanInfo->cleanSttBlocks) { + pScanInfo->numOfRowsInStt = info.numOfRows; + pScanInfo->sttWindow.skey = INT64_MAX; + pScanInfo->sttWindow.ekey = INT64_MIN; + + // calculate the time window for data in stt files + for(int32_t i = 0; i < taosArrayGetSize(info.pTimeWindowList); ++i) { + STimeWindow* pWindow = taosArrayGet(info.pTimeWindowList, i); + if (pScanInfo->sttWindow.skey > pWindow->skey) { + pScanInfo->sttWindow.skey = pWindow->skey; + } + + if (pScanInfo->sttWindow.ekey < pWindow->ekey) { + pScanInfo->sttWindow.ekey = pWindow->ekey; + } + } + + pScanInfo->sttKeyInfo.status = taosArrayGetSize(info.pTimeWindowList)? STT_FILE_HAS_DATA:STT_FILE_NO_DATA; + pScanInfo->sttKeyInfo.nextProcKey = ASCENDING_TRAVERSE(pReader->info.order)? pScanInfo->sttWindow.skey:pScanInfo->sttWindow.ekey; + hasData = true; + } else { + hasData = nextRowFromSttBlocks(pSttBlockReader, pScanInfo, &pReader->info.verRange); + } + } else { + hasData = nextRowFromSttBlocks(pSttBlockReader, pScanInfo, &pReader->info.verRange); + } + + taosArrayDestroy(info.pTimeWindowList); int64_t el = taosGetTimestampUs() - st; pReader->cost.initSttBlockReader += (el / 1000.0); - tsdbDebug("init last block reader completed, elapsed time:%" PRId64 "us %s", el, pReader->idStr); - return code; + tsdbDebug("init stt block reader completed, elapsed time:%" PRId64 "us %s", el, pReader->idStr); + return hasData; } static bool hasDataInSttBlock(SSttBlockReader* pSttBlockReader) { return pSttBlockReader->mergeTree.pIter != NULL; } @@ -2356,18 +2334,15 @@ void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInf static int32_t buildComposedDataBlock(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int64_t st = taosGetTimestampUs(); + int32_t step = asc ? 1 : -1; + double el = 0; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader; - - bool asc = ASCENDING_TRAVERSE(pReader->info.order); - int64_t st = taosGetTimestampUs(); - int32_t step = asc ? 1 : -1; - double el = 0; - SBrinRecord* pRecord = &pBlockInfo->record; - + SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader; + SBrinRecord* pRecord = &pBlockInfo->record; SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; STableBlockScanInfo* pBlockScanInfo = NULL; @@ -2629,10 +2604,10 @@ static bool moveToNextTable(STableUidList* pOrderedCheckInfo, SReaderStatus* pSt } static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; + SReaderStatus* pStatus = &pReader->status; SSttBlockReader* pSttBlockReader = pStatus->fileIter.pSttBlockReader; - STableUidList* pUidList = &pStatus->uidList; - int32_t code = TSDB_CODE_SUCCESS; + STableUidList* pUidList = &pStatus->uidList; + int32_t code = TSDB_CODE_SUCCESS; if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { return TSDB_CODE_SUCCESS; @@ -2668,8 +2643,8 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { continue; } - bool hasDataInLastFile = initSttBlockReader(pSttBlockReader, pScanInfo, pReader); - if (!hasDataInLastFile) { + bool hasDataInSttFile = initSttBlockReader(pSttBlockReader, pScanInfo, pReader); + if (!hasDataInSttFile) { bool hasNexTable = moveToNextTable(pUidList, pStatus); if (!hasNexTable) { return TSDB_CODE_SUCCESS; @@ -2678,12 +2653,31 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { continue; } + // if only require the total rows, no need to load data from stt file if it is clean stt blocks + if (pReader->info.execMode == READER_EXEC_ROWS && pScanInfo->cleanSttBlocks) { + SDataBlockInfo* pInfo = &pResBlock->info; + pInfo->rows = pScanInfo->numOfRowsInStt; + pInfo->id.uid = pScanInfo->uid; + pInfo->dataLoad = 1; + pInfo->window = pScanInfo->sttWindow; + setComposedBlockFlag(pReader, true); + pScanInfo->sttKeyInfo.nextProcKey = + ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; + pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; + pScanInfo->lastProcKey = + ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; + pSttBlockReader->mergeTree.pIter = NULL; + + tsdbDebug("%p uid:%" PRId64 " return clean stt block as one, brange:%" PRId64 "-%" PRId64 " rows:%" PRId64 " %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, pReader->idStr); + return TSDB_CODE_SUCCESS; + } + int64_t st = taosGetTimestampUs(); while (1) { - bool hasBlockLData = hasDataInSttBlock(pSttBlockReader); - - // no data in last block and block, no need to proceed. - if (hasBlockLData == false) { + // no data in stt block and block, no need to proceed. + if (!hasDataInSttBlock(pSttBlockReader)) { break; } @@ -2728,14 +2722,13 @@ static bool notOverlapWithSttFiles(SFileDataBlockInfo* pBlockInfo, STableBlockSc } static int32_t doBuildDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; STableBlockScanInfo* pScanInfo = NULL; SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader; + SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader; bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int32_t code = TSDB_CODE_SUCCESS; if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order); @@ -2793,13 +2786,13 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - tsdbDebug("load data in last block firstly %s", pReader->idStr); + tsdbDebug("load data in stt block firstly %s", pReader->idStr); int64_t st = taosGetTimestampUs(); // let's load data from stt files initSttBlockReader(pSttBlockReader, pScanInfo, pReader); - // no data in last block, no need to proceed. + // no data in stt block, no need to proceed. while (hasDataInSttBlock(pSttBlockReader)) { ASSERT(pScanInfo->sttKeyInfo.status == STT_FILE_HAS_DATA); @@ -2837,147 +2830,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } -static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { - int64_t st = taosGetTimestampUs(); - LRUHandle* handle = NULL; - int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); - if (code != TSDB_CODE_SUCCESS || handle == NULL) { - goto _end; - } - -#if 0 - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - - SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); - size_t num = taosArrayGetSize(aBlockIdx); - if (num == 0) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return TSDB_CODE_SUCCESS; - } - - SBlockIdx* pBlockIdx = NULL; - for (int32_t i = 0; i < num; ++i) { - pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); - if (pBlockIdx->suid != pReader->info.suid) { - continue; - } - - STableBlockScanInfo** p = tSimpleHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(pBlockIdx->uid)); - if (p == NULL) { - continue; - } - - STableBlockScanInfo* pScanInfo = *p; - SDataBlk block = {0}; - // for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - // tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); - // pReader->rowsNum += block.nRow; - // } - } -#endif - -_end: - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return code; -} - -static int32_t doSumSttBlockRows(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader; - SSttBlockLoadInfo* pBlockLoadInfo = NULL; -#if 0 - for (int32_t i = 0; i < pReader->pFileReader->pSet->nSttF; ++i) { // open all last file - pBlockLoadInfo = &pSttBlockReader->pInfo[i]; - - code = tsdbReadSttBlk(pReader->pFileReader, i, pBlockLoadInfo->aSttBlk); - if (code) { - return code; - } - - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - if (size >= 1) { - SSttBlk* pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); - SSttBlk* pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); - - // all identical - if (pStart->suid == pEnd->suid) { - if (pStart->suid != pReader->info.suid) { - // no qualified stt block existed - taosArrayClear(pBlockLoadInfo->aSttBlk); - continue; - } - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - pReader->rowsNum += p->nRow; - } - } else { - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - uint64_t s = p->suid; - if (s < pReader->info.suid) { - continue; - } - - if (s == pReader->info.suid) { - pReader->rowsNum += p->nRow; - } else if (s > pReader->info.suid) { - break; - } - } - } - } - } -#endif - - return code; -} - -static int32_t readRowsCountFromFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - while (1) { - bool hasNext = false; - code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); - if (code) { - return code; - } - - if (!hasNext) { // no data files on disk - break; - } - - // code = doSumFileBlockRows(pReader, pReader->pFileReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doSumSttBlockRows(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - pReader->status.loadFromFile = false; - - return code; -} - -static int32_t readRowsCountFromMem(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - int64_t memNum = 0, imemNum = 0; - if (pReader->pReadSnap->pMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pMem, pReader->status.pTableMap, &memNum); - } - - if (pReader->pReadSnap->pIMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pIMem, pReader->status.pTableMap, &imemNum); - } - - pReader->rowsNum += memNum + imemNum; - - return code; -} - static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; STableUidList* pUidList = &pStatus->uidList; @@ -3107,7 +2959,7 @@ static ERetrieveType doReadDataFromSttFiles(STsdbReader* pReader) { return TSDB_READ_RETURN; } - // all data blocks are checked in this last block file, now let's try the next file + // all data blocks are checked in this stt file, now let's try the next file set ASSERT(pReader->status.pTableIter == NULL); code = initForFirstBlockInFile(pReader, pBlockIter); @@ -3946,7 +3798,7 @@ static int32_t doOpenReaderImpl(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; if (pStatus->fileIter.numOfFiles == 0) { pStatus->loadFromFile = false; - } else if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { +// } else if (READER_EXEC_DATA == pReader->info.readMode) { // DO NOTHING } else { code = initForFirstBlockInFile(pReader, pBlockIter); @@ -3987,8 +3839,7 @@ static void setSharedPtr(STsdbReader* pDst, const STsdbReader* pSrc) { // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, - SHashObj** pIgnoreTables) { + SSDataBlock* pResBlock, void** ppReader, const char* idstr, SHashObj** pIgnoreTables) { STimeWindow window = pCond->twindows; SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); @@ -4094,13 +3945,10 @@ int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableLi } pReader->flag = READER_STATUS_SUSPEND; - - if (countOnly) { - pReader->info.readMode = READ_MODE_COUNT_ONLY; - } +// pReader->info.execMode = pCond->trimData ? READER_EXEC_ROWS : READER_EXEC_DATA; + pReader->info.execMode = READER_EXEC_ROWS; pReader->pIgnoreTables = pIgnoreTables; - tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 " in this query %s", pReader, numOfTables, pReader->info.window.skey, pReader->info.window.ekey, pReader->info.verRange.minVer, @@ -4332,32 +4180,6 @@ _err: return code; } -static bool tsdbReadRowsCountOnly(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - - if (pReader->status.loadFromFile == false) { - return false; - } - - code = readRowsCountFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - code = readRowsCountFromMem(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - pBlock->info.rows = pReader->rowsNum; - pBlock->info.id.uid = 0; - pBlock->info.dataLoad = 0; - pReader->rowsNum = 0; - - return pBlock->info.rows > 0; -} - static int32_t doTsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { int32_t code = TSDB_CODE_SUCCESS; @@ -4372,10 +4194,6 @@ static int32_t doTsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { return code; } - if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { - return tsdbReadRowsCountOnly(pReader); - } - if (pStatus->loadFromFile) { code = buildBlockFromFiles(pReader); if (code != TSDB_CODE_SUCCESS) { @@ -4685,7 +4503,7 @@ SSDataBlock* tsdbRetrieveDataBlock2(STsdbReader* pReader, SArray* pIdList) { } SReaderStatus* pStatus = &pTReader->status; - if (pStatus->composedDataBlock) { + if (pStatus->composedDataBlock || pReader->info.execMode == READER_EXEC_ROWS) { return pTReader->resBlockInfo.pResBlock; } @@ -4722,9 +4540,9 @@ int32_t tsdbReaderReset2(STsdbReader* pReader, SQueryTableDataCond* pCond) { pReader->info.order = pCond->order; pReader->type = TIMEWINDOW_RANGE_CONTAINED; + pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); pStatus->loadFromFile = true; pStatus->pTableIter = NULL; - pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); // allocate buffer in order to load data blocks from file memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index a058c0173d..2db49b8815 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -22,15 +22,7 @@ #include "tsdbUtil2.h" #include "tsimplehash.h" -int32_t uidComparFunc(const void* p1, const void* p2) { - uint64_t pu1 = *(uint64_t*)p1; - uint64_t pu2 = *(uint64_t*)p2; - if (pu1 == pu2) { - return 0; - } else { - return (pu1 < pu2) ? -1 : 1; - } -} +static bool overlapWithDelSkylineWithoutVer(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order); static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { int32_t num = numOfTables / pBuf->numPerBucket; @@ -61,6 +53,16 @@ static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { return TSDB_CODE_SUCCESS; } +int32_t uidComparFunc(const void* p1, const void* p2) { + uint64_t pu1 = *(uint64_t*)p1; + uint64_t pu2 = *(uint64_t*)p2; + if (pu1 == pu2) { + return 0; + } else { + return (pu1 < pu2) ? -1 : 1; + } +} + int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { if (numOfTables <= pBuf->numOfTables) { return TSDB_CODE_SUCCESS; @@ -243,6 +245,10 @@ static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) { taosArrayClear(pScanInfo->pBlockList); taosArrayClear(pScanInfo->pBlockIdxList); taosArrayClear(pScanInfo->pFileDelData); // del data from each file set + pScanInfo->cleanSttBlocks = false; + pScanInfo->numOfRowsInStt = 0; + pScanInfo->sttWindow.skey = INT64_MAX; + pScanInfo->sttWindow.ekey = INT64_MIN; pScanInfo->sttKeyInfo.status = STT_FILE_READER_UNINIT; } @@ -488,7 +494,7 @@ typedef enum { BLK_CHECK_QUIT = 0x2, } ETombBlkCheckEnum; -static void loadNextStatisticsBlock(SSttFileReader* pSttFileReader, const SSttBlockLoadInfo* pBlockLoadInfo, +static void loadNextStatisticsBlock(SSttFileReader* pSttFileReader, STbStatisBlock* pStatisBlock, const TStatisBlkArray* pStatisBlkArray, int32_t numOfRows, int32_t* i, int32_t* j); static int32_t doCheckTombBlock(STombBlock* pBlock, STsdbReader* pReader, int32_t numOfTables, int32_t* j, ETombBlkCheckEnum* pRet) { @@ -662,17 +668,17 @@ void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemT } } -int32_t getNumOfRowsInSttBlock(SSttFileReader *pSttFileReader, SSttBlockLoadInfo *pBlockLoadInfo, uint64_t suid, - const uint64_t* pUidList, int32_t numOfTables) { +int32_t getNumOfRowsInSttBlock(SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pBlockLoadInfo, + TStatisBlkArray* pStatisBlkArray, uint64_t suid, const uint64_t* pUidList, + int32_t numOfTables) { int32_t num = 0; - const TStatisBlkArray *pStatisBlkArray = pBlockLoadInfo->pSttStatisBlkArray; if (TARRAY2_SIZE(pStatisBlkArray) <= 0) { return 0; } int32_t i = 0; - while((i < TARRAY2_SIZE(pStatisBlkArray)) && (pStatisBlkArray->data[i].minTbid.suid < suid)) { + while((i < TARRAY2_SIZE(pStatisBlkArray)) && (pStatisBlkArray->data[i].maxTbid.suid < suid)) { ++i; } @@ -681,64 +687,65 @@ int32_t getNumOfRowsInSttBlock(SSttFileReader *pSttFileReader, SSttBlockLoadInfo } SStatisBlk *p = &pStatisBlkArray->data[i]; - if (pBlockLoadInfo->statisBlock == NULL) { - pBlockLoadInfo->statisBlock = taosMemoryCalloc(1, sizeof(STbStatisBlock)); - tStatisBlockInit(pBlockLoadInfo->statisBlock); - } + STbStatisBlock* pStatisBlock = taosMemoryCalloc(1, sizeof(STbStatisBlock)); + tStatisBlockInit(pStatisBlock); int64_t st = taosGetTimestampMs(); - tsdbSttFileReadStatisBlock(pSttFileReader, p, pBlockLoadInfo->statisBlock); - pBlockLoadInfo->statisBlockIndex = i; + tsdbSttFileReadStatisBlock(pSttFileReader, p, pStatisBlock); double el = (taosGetTimestampMs() - st) / 1000.0; pBlockLoadInfo->cost.loadStatisBlocks += 1; pBlockLoadInfo->cost.statisElapsedTime += el; - STbStatisBlock *pBlock = pBlockLoadInfo->statisBlock; - int32_t index = 0; - while (index < TARRAY2_SIZE(pBlock->suid) && pBlock->suid->data[index] < suid) { + while (index < TARRAY2_SIZE(pStatisBlock->suid) && pStatisBlock->suid->data[index] < suid) { ++index; } - if (index >= TARRAY2_SIZE(pBlock->suid)) { + if (index >= TARRAY2_SIZE(pStatisBlock->suid)) { + tStatisBlockDestroy(pStatisBlock); + taosMemoryFreeClear(pStatisBlock); return num; } int32_t j = index; int32_t uidIndex = 0; - while (i < TARRAY2_SIZE(pStatisBlkArray) && uidIndex <= numOfTables) { + while (i < TARRAY2_SIZE(pStatisBlkArray) && uidIndex < numOfTables) { p = &pStatisBlkArray->data[i]; if (p->minTbid.suid > suid) { + tStatisBlockDestroy(pStatisBlock); + taosMemoryFreeClear(pStatisBlock); return num; } uint64_t uid = pUidList[uidIndex]; - if (pBlock->uid->data[j] == uid) { - num += pBlock->count->data[j]; + if (pStatisBlock->uid->data[j] == uid) { + num += pStatisBlock->count->data[j]; uidIndex += 1; j += 1; - loadNextStatisticsBlock(pSttFileReader, pBlockLoadInfo, pStatisBlkArray, pBlock->suid->size, &i, &j); - } else if (pBlock->uid->data[j] < uid) { + loadNextStatisticsBlock(pSttFileReader, pStatisBlock, pStatisBlkArray, pStatisBlock->suid->size, &i, &j); + } else if (pStatisBlock->uid->data[j] < uid) { j += 1; - loadNextStatisticsBlock(pSttFileReader, pBlockLoadInfo, pStatisBlkArray, pBlock->suid->size, &i, &j); + loadNextStatisticsBlock(pSttFileReader, pStatisBlock, pStatisBlkArray, pStatisBlock->suid->size, &i, &j); } else { uidIndex += 1; } } + tStatisBlockDestroy(pStatisBlock); + taosMemoryFreeClear(pStatisBlock); return num; } // load next stt statistics block -static void loadNextStatisticsBlock(SSttFileReader* pSttFileReader, const SSttBlockLoadInfo* pBlockLoadInfo, +static void loadNextStatisticsBlock(SSttFileReader* pSttFileReader, STbStatisBlock* pStatisBlock, const TStatisBlkArray* pStatisBlkArray, int32_t numOfRows, int32_t* i, int32_t* j) { if ((*j) >= numOfRows) { (*i) += 1; (*j) = 0; if ((*i) < TARRAY2_SIZE(pStatisBlkArray)) { - tsdbSttFileReadStatisBlock(pSttFileReader, &pStatisBlkArray->data[(*i)], pBlockLoadInfo->statisBlock); + tsdbSttFileReadStatisBlock(pSttFileReader, &pStatisBlkArray->data[(*i)], pStatisBlock); } } } @@ -762,7 +769,7 @@ void doAdjustValidDataIters(SArray* pLDIterList, int32_t numOfFileObj) { } } -int32_t adjustLDataIters(SArray* pSttFileBlockIterArray, STFileSet* pFileSet) { +int32_t adjustSttDataIters(SArray* pSttFileBlockIterArray, STFileSet* pFileSet) { int32_t numOfLevels = pFileSet->lvlArr->size; // add the list/iter placeholder @@ -791,7 +798,7 @@ int32_t tsdbGetRowsInSttFiles(STFileSet* pFileSet, SArray* pSttFileBlockIterArra } // add the list/iter placeholder - adjustLDataIters(pSttFileBlockIterArray, pFileSet); + adjustSttDataIters(pSttFileBlockIterArray, pFileSet); for (int32_t j = 0; j < numOfLevels; ++j) { SSttLvl* pSttLevel = pFileSet->lvlArr->data[j]; @@ -819,7 +826,8 @@ int32_t tsdbGetRowsInSttFiles(STFileSet* pFileSet, SArray* pSttFileBlockIterArra } // load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file - int32_t code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pIter->pBlockLoadInfo->pSttStatisBlkArray); + TStatisBlkArray *pStatisBlkArray = NULL; + int32_t code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pStatisBlkArray); if (code != TSDB_CODE_SUCCESS) { tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), pstr); continue; @@ -829,9 +837,199 @@ int32_t tsdbGetRowsInSttFiles(STFileSet* pFileSet, SArray* pSttFileBlockIterArra STsdbReader* pReader = pConf->pReader; int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); uint64_t* pUidList = pReader->status.uidList.tableUidList; - numOfRows += getNumOfRowsInSttBlock(pIter->pReader, pIter->pBlockLoadInfo, pConf->suid, pUidList, numOfTables); + numOfRows += getNumOfRowsInSttBlock(pIter->pReader, pIter->pBlockLoadInfo, pStatisBlkArray, pConf->suid, pUidList, + numOfTables); } } return numOfRows; +} + +// overlap with deletion skyline +static bool overlapWithTimeWindow(STimeWindow* p1, STimeWindow* pQueryWindow, STableBlockScanInfo* pBlockScanInfo, + int32_t order) { + // overlap with query window + if (!(p1->skey >= pQueryWindow->skey && p1->ekey <= pQueryWindow->ekey)) { + return true; + } + + // overlap with mem data + SIterInfo* pMemIter = &pBlockScanInfo->iter; + SIterInfo* pIMemIter = &pBlockScanInfo->iiter; + + if ((pMemIter->hasVal) && p1->ekey >= pMemIter->iter->pTbData->minKey && p1->skey <= pMemIter->iter->pTbData->maxKey) { + return true; + } + + // overlap with imem data + if ((pIMemIter->hasVal) && p1->ekey >= pIMemIter->iter->pTbData->minKey && p1->skey <= pIMemIter->iter->pTbData->maxKey) { + return true; + } + + // overlap with deletion skyline + SBrinRecord record = {.firstKey = p1->skey, .lastKey = p1->ekey}; + if (overlapWithDelSkylineWithoutVer(pBlockScanInfo, &record, order)) { + return true; + } + + return false; +} + +static int32_t sortUidComparFn(const void* p1, const void* p2) { + const STimeWindow* px1 = p1; + const STimeWindow* px2 = p2; + if (px1->skey == px2->skey) { + return 0; + } else { + return px1->skey < px2->skey? -1:1; + } +} + +bool isCleanSttBlock(SArray* pTimewindowList, STimeWindow* pQueryWindow, STableBlockScanInfo *pScanInfo, int32_t order) { + // check if it overlap with del skyline + taosArraySort(pTimewindowList, sortUidComparFn); + + int32_t num = taosArrayGetSize(pTimewindowList); + if (num == 0) { + return false; + } + + STimeWindow* p = taosArrayGet(pTimewindowList, 0); + if (overlapWithTimeWindow(p, pQueryWindow, pScanInfo, order)) { + return false; + } + + for (int32_t i = 0; i < num - 1; ++i) { + STimeWindow* p1 = taosArrayGet(pTimewindowList, i); + STimeWindow* p2 = taosArrayGet(pTimewindowList, i + 1); + + if (p1->ekey >= p2->skey) { + return false; + } + + bool overlap = overlapWithTimeWindow(p2, pQueryWindow, pScanInfo, order); + if (overlap) { + return false; + } + } + + return true; +} + +static bool doCheckDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, + int32_t startIndex) { + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + + for (int32_t i = startIndex; i < num; i += 1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pRecord->firstKey && p->ts <= pRecord->lastKey) { + if (p->version >= pRecord->minVer) { + return true; + } + } else if (p->ts < pRecord->firstKey) { // p->ts < pBlock->minKey.ts + if (p->version >= pRecord->minVer) { + if (i < num - 1) { + TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); + if (pnext->ts >= pRecord->firstKey) { + return true; + } + } else { // it must be the last point + ASSERT(p->version == 0); + } + } + } else { // (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + return false; +} + +static bool doCheckDatablockOverlapWithoutVersion(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, + int32_t startIndex) { + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + + for (int32_t i = startIndex; i < num; i += 1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pRecord->firstKey && p->ts <= pRecord->lastKey) { + return true; + } else if (p->ts < pRecord->firstKey) { // p->ts < pBlock->minKey.ts + if (i < num - 1) { + TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); + if (pnext->ts >= pRecord->firstKey) { + return true; + } + } + } else { // (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + return false; +} + +bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order) { + if (pBlockScanInfo->delSkyline == NULL || (taosArrayGetSize(pBlockScanInfo->delSkyline) == 0)) { + return false; + } + + // ts is not overlap + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + if (pRecord->firstKey > pLast->ts || pRecord->lastKey < pFirst->ts) { + return false; + } + + // version is not overlap + if (ASCENDING_TRAVERSE(order)) { + return doCheckDatablockOverlap(pBlockScanInfo, pRecord, pBlockScanInfo->fileDelIndex); + } else { + int32_t index = pBlockScanInfo->fileDelIndex; + while (1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); + if (p->ts > pRecord->firstKey && index > 0) { + index -= 1; + } else { // find the first point that is smaller than the minKey.ts of dataBlock. + if (p->ts == pRecord->firstKey && p->version < pRecord->maxVer && index > 0) { + index -= 1; + } + break; + } + } + + return doCheckDatablockOverlap(pBlockScanInfo, pRecord, index); + } +} + +bool overlapWithDelSkylineWithoutVer(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order) { + if (pBlockScanInfo->delSkyline == NULL || (taosArrayGetSize(pBlockScanInfo->delSkyline) == 0)) { + return false; + } + + // ts is not overlap + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + if (pRecord->firstKey > pLast->ts || pRecord->lastKey < pFirst->ts) { + return false; + } + + // version is not overlap + if (ASCENDING_TRAVERSE(order)) { + return doCheckDatablockOverlapWithoutVersion(pBlockScanInfo, pRecord, pBlockScanInfo->fileDelIndex); + } else { + int32_t index = pBlockScanInfo->fileDelIndex; + while (1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); + if (p->ts > pRecord->firstKey && index > 0) { + index -= 1; + } else { // find the first point that is smaller than the minKey.ts of dataBlock. + if (p->ts == pRecord->firstKey && index > 0) { + index -= 1; + } + break; + } + } + + return doCheckDatablockOverlapWithoutVersion(pBlockScanInfo, pRecord, index); + } } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index e9c7449082..401eadee0f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -39,8 +39,7 @@ typedef enum { typedef struct STsdbReaderInfo { uint64_t suid; STSchema* pSchema; - EReadMode readMode; - uint64_t rowsNum; + EExecMode execMode; STimeWindow window; SVersionRange verRange; int16_t order; @@ -74,6 +73,11 @@ typedef struct SSttKeyInfo { int64_t nextProcKey; } SSttKeyInfo; +// clean stt file blocks: +// 1. not overlap with stt blocks in other stt files of the same fileset +// 2. not overlap with delete skyline +// 3. not overlap with in-memory data (mem/imem) +// 4. not overlap with data file blocks typedef struct STableBlockScanInfo { uint64_t uid; TSKEY lastProcKey; @@ -88,6 +92,9 @@ typedef struct STableBlockScanInfo { int32_t fileDelIndex; // file block delete index int32_t sttBlockDelIndex; // delete index for last block bool iterInit; // whether to initialize the in-memory skip list iterator or not + bool cleanSttBlocks; // stt block is clean in current fileset + int64_t numOfRowsInStt; + STimeWindow sttWindow; } STableBlockScanInfo; typedef struct SResultBlockInfo { @@ -145,6 +152,7 @@ typedef struct SBlockLoadSuppInfo { bool smaValid; // the sma on all queried columns are activated } SBlockLoadSuppInfo; +// each blocks in stt file not overlaps with in-memory/data-file/tomb-files, and not overlap with any other blocks in stt-file typedef struct SSttBlockReader { STimeWindow window; SVersionRange verRange; @@ -262,12 +270,17 @@ bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr); void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver); int32_t loadDataFileTombDataForAll(STsdbReader* pReader); int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo); -int32_t getNumOfRowsInSttBlock(SSttFileReader *pSttFileReader, SSttBlockLoadInfo *pBlockLoadInfo, uint64_t suid, - const uint64_t* pUidList, int32_t numOfTables); +int32_t getNumOfRowsInSttBlock(SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pBlockLoadInfo, + TStatisBlkArray* pStatisBlkArray, uint64_t suid, const uint64_t* pUidList, + int32_t numOfTables); + void destroyLDataIter(SLDataIter* pIter); -int32_t adjustLDataIters(SArray* pSttFileBlockIterArray, STFileSet* pFileSet); +int32_t adjustSttDataIters(SArray* pSttFileBlockIterArray, STFileSet* pFileSet); int32_t tsdbGetRowsInSttFiles(STFileSet* pFileSet, SArray* pSttFileBlockIterArray, STsdb* pTsdb, SMergeTreeConf* pConf, const char* pstr); +bool isCleanSttBlock(SArray* pTimewindowList, STimeWindow* pQueryWindow, STableBlockScanInfo* pScanInfo, int32_t order); +bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order); + typedef struct { SArray* pTombData; } STableLoadInfo; diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index a6673917bf..48e82700c3 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -42,7 +42,7 @@ void initStorageAPI(SStorageAPI* pAPI) { void initTsdbReaderAPI(TsdReader* pReader) { pReader->tsdReaderOpen = (int32_t(*)(void*, SQueryTableDataCond*, void*, int32_t, SSDataBlock*, void**, const char*, - bool, SHashObj**))tsdbReaderOpen2; + SHashObj**))tsdbReaderOpen2; pReader->tsdReaderClose = tsdbReaderClose2; pReader->tsdNextDataBlock = tsdbNextDataBlock2; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 1f82a9477b..1fa911e646 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1232,7 +1232,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT if (pScanBaseInfo->dataReader == NULL) { int32_t code = pTaskInfo->storageAPI.tsdReader.tsdReaderOpen( pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1, pScanInfo->pResBlock, - (void**)&pScanBaseInfo->dataReader, id, false, NULL); + (void**)&pScanBaseInfo->dataReader, id, NULL); if (code != TSDB_CODE_SUCCESS) { qError("prepare read tsdb snapshot failed, uid:%" PRId64 ", code:%s %s", pOffset->uid, tstrerror(code), id); terrno = code; @@ -1291,7 +1291,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT int32_t size = tableListGetSize(pTableListInfo); pTaskInfo->storageAPI.tsdReader.tsdReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, - (void**)&pInfo->dataReader, NULL, false, NULL); + (void**)&pInfo->dataReader, NULL, NULL); cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond); strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 448c585869..813e086c55 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -889,7 +889,7 @@ static SSDataBlock* groupSeqTableScan(SOperatorInfo* pOperator) { ASSERT(pInfo->base.dataReader == NULL); int32_t code = pAPI->tsdReader.tsdReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, pInfo->pResBlock, - (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), pInfo->countOnly, &pInfo->pIgnoreTables); + (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), &pInfo->pIgnoreTables); if (code != TSDB_CODE_SUCCESS) { T_LONG_JMP(pTaskInfo->env, code); } @@ -1179,7 +1179,7 @@ static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbU SSDataBlock* pBlock = pTableScanInfo->pResBlock; STsdbReader* pReader = NULL; int32_t code = pAPI->tsdReader.tsdReaderOpen(pTableScanInfo->base.readHandle.vnode, &cond, &tblInfo, 1, pBlock, - (void**)&pReader, GET_TASKID(pTaskInfo), false, NULL); + (void**)&pReader, GET_TASKID(pTaskInfo), NULL); if (code != TSDB_CODE_SUCCESS) { terrno = code; T_LONG_JMP(pTaskInfo->env, code); @@ -3373,7 +3373,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { param->pOperator = pOperator; STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, - (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, &pInfo->mSkipTables); + (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), &pInfo->mSkipTables); SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); ps->param = param; diff --git a/source/libs/executor/src/sysscanoperator.c b/source/libs/executor/src/sysscanoperator.c index 6bdbefc5c0..ac4b8e88c7 100644 --- a/source/libs/executor/src/sysscanoperator.c +++ b/source/libs/executor/src/sysscanoperator.c @@ -2304,7 +2304,7 @@ SOperatorInfo* createDataBlockInfoScanOperator(SReadHandle* readHandle, SBlockDi void* pList = tableListGetInfo(pTableListInfo, 0); code = readHandle->api.tsdReader.tsdReaderOpen(readHandle->vnode, &cond, pList, num, pInfo->pResBlock, - (void**)&pInfo->pHandle, pTaskInfo->id.str, false, NULL); + (void**)&pInfo->pHandle, pTaskInfo->id.str, NULL); cleanupQueryTableDataCond(&cond); if (code != 0) { goto _error; From 446c14da72666d4f27287e3afb9c2241101ac169 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 10:03:36 +0800 Subject: [PATCH 46/65] fix(tsdb): opt read stt file --- include/common/tcommon.h | 2 +- source/dnode/vnode/src/tsdb/tsdbRead2.c | 3 +-- source/libs/executor/src/executil.c | 3 +++ source/libs/executor/src/scanoperator.c | 1 - 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 81e3af88a5..24e5d186b9 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -253,7 +253,7 @@ typedef struct SQueryTableDataCond { STimeWindow twindows; int64_t startVersion; int64_t endVersion; - bool trimData; // response the actual data, not only the rows in the attribute of info.row of ssdatablock + bool notLoadData; // response the actual data, not only the rows in the attribute of info.row of ssdatablock } SQueryTableDataCond; int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index e09ac504d0..bb0efd858b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -3945,8 +3945,7 @@ int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableLi } pReader->flag = READER_STATUS_SUSPEND; -// pReader->info.execMode = pCond->trimData ? READER_EXEC_ROWS : READER_EXEC_DATA; - pReader->info.execMode = READER_EXEC_ROWS; + pReader->info.execMode = pCond->notLoadData ? READER_EXEC_ROWS : READER_EXEC_DATA; pReader->pIgnoreTables = pIgnoreTables; tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 39b47504c6..b0c0799351 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1734,6 +1734,9 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->endVersion = -1; pCond->skipRollup = readHandle->skipRollup; + // allowed read stt file optimization mode + pCond->notLoadData = (pTableScanNode->dataRequired == FUNC_DATA_REQUIRED_NOT_LOAD); + int32_t j = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { STargetNode* pNode = (STargetNode*)nodesListGetNode(pTableScanNode->scan.pScanCols, i); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 813e086c55..7885f3bee1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1059,7 +1059,6 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, pInfo->base.readerAPI = pTaskInfo->storageAPI.tsdReader; initResultSizeInfo(&pOperator->resultInfo, 4096); pInfo->pResBlock = createDataBlockFromDescNode(pDescNode); - // blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); code = filterInitFromNode((SNode*)pTableScanNode->scan.node.pConditions, &pOperator->exprSupp.pFilterInfo, 0); if (code != TSDB_CODE_SUCCESS) { From 9477146b30625e4b9c36139f46cf002b4351888f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 10:29:38 +0800 Subject: [PATCH 47/65] fix(tsdb): fix syntax error. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 2 +- source/libs/executor/src/scanoperator.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 50fd6d447d..c49da49814 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -2873,7 +2873,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } -static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { +static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader, int64_t endKey) { SReaderStatus* pStatus = &pReader->status; STableUidList* pUidList = &pStatus->uidList; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6ce6c5f6eb..ea73f60468 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3426,7 +3426,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t numOfTable = tableEndIdx - tableStartIdx + 1; STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, - (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, &pInfo->mSkipTables); + (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), &pInfo->mSkipTables); if (pInfo->filesetDelimited) { pAPI->tsdReader.tsdSetFilesetDelimited(pInfo->base.dataReader); } From 2e13b0b97df10ce5bd560ff1ac7995ffa1bfd691 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 10:43:17 +0800 Subject: [PATCH 48/65] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 2 -- source/dnode/vnode/src/tsdb/tsdbReadUtil.h | 1 - 2 files changed, 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index c49da49814..ed62e9ea9c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -4104,8 +4104,6 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; STableBlockScanInfo* pBlockScanInfo = NULL; - pReader->status.suspendInvoked = true; // record the suspend status - if (pStatus->loadFromFile) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); if (pBlockInfo != NULL) { diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index 74b88d6ec8..89e4c4224d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -195,7 +195,6 @@ typedef struct SFileBlockDumpInfo { } SFileBlockDumpInfo; typedef struct SReaderStatus { - bool suspendInvoked; bool loadFromFile; // check file stage bool composedDataBlock; // the returned data block is a composed block or not SSHashObj* pTableMap; // SHash From da1207ef742f25d4f9f21b245259bd12d4bc18f5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 11:07:45 +0800 Subject: [PATCH 49/65] fix(tsdb): add return flag for stt files. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 3 ++- source/dnode/vnode/src/tsdb/tsdbReadUtil.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index ed62e9ea9c..7900afb889 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -2069,7 +2069,7 @@ static bool initSttBlockReader(SSttBlockReader* pSttBlockReader, STableBlockScan // second time init stt block reader if (pScanInfo->cleanSttBlocks && pReader->info.execMode == READER_EXEC_ROWS) { - return true; + return !pScanInfo->sttBlockReturned; } STimeWindow w = pSttBlockReader->window; @@ -2710,6 +2710,7 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { pScanInfo->lastProcKey = ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; pSttBlockReader->mergeTree.pIter = NULL; + pScanInfo->sttBlockReturned = true; tsdbDebug("%p uid:%" PRId64 " return clean stt block as one, brange:%" PRId64 "-%" PRId64 " rows:%" PRId64 " %s", pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index 89e4c4224d..a9e80e1b8c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -94,6 +94,7 @@ typedef struct STableBlockScanInfo { int32_t sttBlockDelIndex; // delete index for last block bool iterInit; // whether to initialize the in-memory skip list iterator or not bool cleanSttBlocks; // stt block is clean in current fileset + bool sttBlockReturned; // result block returned alreay int64_t numOfRowsInStt; STimeWindow sttWindow; } STableBlockScanInfo; From b9d056c6a8b6bec29605e03a4811edff24c1bd4e Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 8 Dec 2023 14:11:36 +0800 Subject: [PATCH 50/65] fix: little fix --- source/dnode/vnode/src/vnd/vnodeAsync.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeAsync.c b/source/dnode/vnode/src/vnd/vnodeAsync.c index c95d2324aa..c6bf60fa76 100644 --- a/source/dnode/vnode/src/vnd/vnodeAsync.c +++ b/source/dnode/vnode/src/vnd/vnodeAsync.c @@ -177,12 +177,15 @@ static int32_t vnodeAsyncTaskDone(SVAsync *async, SVATask *task) { } static int32_t vnodeAsyncCancelAllTasks(SVAsync *async) { - for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { - while (async->queue[i].next != &async->queue[i]) { - SVATask *task = async->queue[i].next; - task->prev->next = task->next; - task->next->prev = task->prev; - vnodeAsyncTaskDone(async, task); + while (async->queue[0].next != &async->queue[0] || async->queue[1].next != &async->queue[1] || + async->queue[2].next != &async->queue[2]) { + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + while (async->queue[i].next != &async->queue[i]) { + SVATask *task = async->queue[i].next; + task->prev->next = task->next; + task->next->prev = task->prev; + vnodeAsyncTaskDone(async, task); + } } } return 0; From 9c72ce846e63018ca210c7b2bc2827d2b77fad85 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 14:23:37 +0800 Subject: [PATCH 51/65] fix(tsdb): add more condition for not load data. --- source/dnode/vnode/src/inc/vnodeInt.h | 2 -- source/dnode/vnode/src/tsdb/tsdbRead2.c | 2 +- source/libs/executor/src/executil.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7ed0b5103f..50a3870c6e 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -236,10 +236,8 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg); -int32_t tqRestartStreamTasks(STQ* pTq); int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); int32_t tqScanWal(STQ* pTq); -int32_t tqStartStreamTasks(STQ* pTq); int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 7900afb889..7b18e33e3c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -3993,7 +3993,7 @@ int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableLi } pReader->flag = READER_STATUS_SUSPEND; - pReader->info.execMode = pCond->notLoadData ? READER_EXEC_ROWS : READER_EXEC_DATA; + pReader->info.execMode = pCond->notLoadData? READER_EXEC_ROWS : READER_EXEC_DATA; pReader->pIgnoreTables = pIgnoreTables; tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index b0c0799351..5c864e7405 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1735,7 +1735,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->skipRollup = readHandle->skipRollup; // allowed read stt file optimization mode - pCond->notLoadData = (pTableScanNode->dataRequired == FUNC_DATA_REQUIRED_NOT_LOAD); + pCond->notLoadData = (pTableScanNode->dataRequired == FUNC_DATA_REQUIRED_NOT_LOAD) && (pTableScanNode->scan.node.pConditions == NULL); int32_t j = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { From 13193e29b746e8565ec8fcd811aa9dc081fd5a4c Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Fri, 8 Dec 2023 14:28:40 +0800 Subject: [PATCH 52/65] delete invalid event window --- source/libs/executor/src/streameventwindowoperator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 55029ac036..914b95ba83 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -323,6 +323,9 @@ static void doStreamEventAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl tSimpleHashRemove(pSeUpdated, &curWin.winInfo.sessionWin, sizeof(SSessionKey)); doDeleteEventWindow(pAggSup, pSeUpdated, &curWin.winInfo.sessionWin); releaseOutputBuf(pAggSup->pState, curWin.winInfo.pStatePos, &pAPI->stateStore); + SSessionKey tmpSeInfo = {0}; + getSessionHashKey(&curWin.winInfo.sessionWin, &tmpSeInfo); + tSimpleHashPut(pStDeleted, &tmpSeInfo, sizeof(SSessionKey), NULL, 0); continue; } code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &curWin.winInfo, &pResult, i, winRows, rows, numOfOutput, From a0487529d029fb58db753c905222edbe6dbfb237 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 8 Dec 2023 14:37:21 +0800 Subject: [PATCH 53/65] more fix --- source/dnode/vnode/src/tsdb/tsdbCommit2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index a974eb27bf..dc76aa61b2 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -435,7 +435,7 @@ _exit: tsdbDebug("vgId:%d %s done, fid:%d minKey:%" PRId64 " maxKey:%" PRId64 " expLevel:%d", TD_VID(tsdb->pVnode), __func__, committer->ctx->fid, committer->ctx->minKey, committer->ctx->maxKey, committer->ctx->expLevel); } - return 0; + return code; } static int32_t tsdbCommitFileSetEnd(SCommitter2 *committer) { From 63b34c7acfeaf53dffdc7b39e1eab871eb90443f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 16:09:58 +0800 Subject: [PATCH 54/65] fix(tsdb): add check for not load stt file blocks. --- source/dnode/vnode/src/tsdb/tsdbMergeTree.c | 43 +++++++++++---------- source/libs/executor/src/executil.c | 4 +- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index ee92edc2a9..0f78fbfbfb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -350,30 +350,33 @@ static int32_t loadSttStatisticsBlockData(SSttFileReader *pSttFileReader, SSttBl tsdbSttFileReadStatisBlock(pSttFileReader, &pStatisBlkArray->data[k], &block); int32_t i = 0; - while(block.suid->data[i] != suid) { + int32_t rows = TARRAY2_SIZE(block.suid); + while (i < rows && block.suid->data[i] != suid) { ++i; } - int32_t rows = TARRAY2_SIZE(block.suid); - if (pBlockLoadInfo->info.pUid == NULL) { - pBlockLoadInfo->info.pUid = taosArrayInit(rows, sizeof(int64_t)); - pBlockLoadInfo->info.pFirstKey = taosArrayInit(rows, sizeof(int64_t)); - pBlockLoadInfo->info.pLastKey = taosArrayInit(rows, sizeof(int64_t)); - pBlockLoadInfo->info.pCount = taosArrayInit(rows, sizeof(int64_t)); - } + // existed + if (i < rows) { + if (pBlockLoadInfo->info.pUid == NULL) { + pBlockLoadInfo->info.pUid = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pFirstKey = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pLastKey = taosArrayInit(rows, sizeof(int64_t)); + pBlockLoadInfo->info.pCount = taosArrayInit(rows, sizeof(int64_t)); + } - if (pStatisBlkArray->data[k].maxTbid.suid == suid) { - taosArrayAddBatch(pBlockLoadInfo->info.pUid, &block.uid->data[i], rows - i); - taosArrayAddBatch(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i], rows - i); - taosArrayAddBatch(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i], rows - i); - taosArrayAddBatch(pBlockLoadInfo->info.pCount, &block.count->data[i], rows - i); - } else { - while(i < rows && block.suid->data[i] == suid) { - taosArrayPush(pBlockLoadInfo->info.pUid, &block.uid->data[i]); - taosArrayPush(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i]); - taosArrayPush(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i]); - taosArrayPush(pBlockLoadInfo->info.pCount, &block.count->data[i]); - i += 1; + if (pStatisBlkArray->data[k].maxTbid.suid == suid) { + taosArrayAddBatch(pBlockLoadInfo->info.pUid, &block.uid->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i], rows - i); + taosArrayAddBatch(pBlockLoadInfo->info.pCount, &block.count->data[i], rows - i); + } else { + while (i < rows && block.suid->data[i] == suid) { + taosArrayPush(pBlockLoadInfo->info.pUid, &block.uid->data[i]); + taosArrayPush(pBlockLoadInfo->info.pFirstKey, &block.firstKey->data[i]); + taosArrayPush(pBlockLoadInfo->info.pLastKey, &block.lastKey->data[i]); + taosArrayPush(pBlockLoadInfo->info.pCount, &block.count->data[i]); + i += 1; + } } } } diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 5c864e7405..377de99fc0 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1735,7 +1735,9 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->skipRollup = readHandle->skipRollup; // allowed read stt file optimization mode - pCond->notLoadData = (pTableScanNode->dataRequired == FUNC_DATA_REQUIRED_NOT_LOAD) && (pTableScanNode->scan.node.pConditions == NULL); + pCond->notLoadData = (pTableScanNode->dataRequired == FUNC_DATA_REQUIRED_NOT_LOAD) && + (pTableScanNode->scan.node.pConditions == NULL) && + (pTableScanNode->interval == 0); int32_t j = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { From 7570fa758bb9e14b6b00bab6eceb4f7370f726e2 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Fri, 8 Dec 2023 16:28:18 +0800 Subject: [PATCH 55/65] init delete mark --- source/libs/executor/inc/executorInt.h | 3 +++ .../executor/src/streameventwindowoperator.c | 8 ++++++++ .../libs/executor/src/streamtimewindowoperator.c | 16 +++++++++------- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index cba26c46b5..9b0052976c 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -636,6 +636,7 @@ typedef struct SStreamEventAggOperatorInfo { bool isHistoryOp; SArray* historyWins; bool reCkBlock; + bool recvGetAll; SSDataBlock* pCheckpointRes; SFilterInfo* pStartCondInfo; SFilterInfo* pEndCondInfo; @@ -837,6 +838,8 @@ void compactTimeWindow(SExprSupp* pSup, SStreamAggSupporter* pAggSup, STimeW int32_t releaseOutputBuf(void* pState, SRowBuffPos* pPos, SStateStore* pAPI); void resetWinRange(STimeWindow* winRange); bool checkExpiredData(SStateStore* pAPI, SUpdateInfo* pUpdateInfo, STimeWindowAggSupp* pTwSup, uint64_t tableId, TSKEY ts); +int64_t getDeleteMark(SWindowPhysiNode* pWinPhyNode, int64_t interval); +void resetUnCloseSessionWinInfo(SSHashObj* winMap); int32_t encodeSSessionKey(void** buf, SSessionKey* key); void* decodeSSessionKey(void* buf, SSessionKey* key); diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 914b95ba83..3d38dfffa4 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -486,6 +486,11 @@ static SSDataBlock* doStreamEventAgg(SOperatorInfo* pOperator) { return pInfo->pCheckpointRes; } + if (pInfo->recvGetAll) { + pInfo->recvGetAll = false; + resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows); + } + setOperatorCompleted(pOperator); return NULL; } @@ -510,6 +515,7 @@ static SSDataBlock* doStreamEventAgg(SOperatorInfo* pOperator) { deleteSessionWinState(&pInfo->streamAggSup, pBlock, pInfo->pSeUpdated, pInfo->pSeDeleted); continue; } else if (pBlock->info.type == STREAM_GET_ALL) { + pInfo->recvGetAll = true; getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pSeUpdated); continue; } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { @@ -672,6 +678,7 @@ SOperatorInfo* createStreamEventAggOperatorInfo(SOperatorInfo* downstream, SPhys .calTrigger = pEventNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, + .deleteMark = getDeleteMark(&pEventNode->window, 0), }; initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); @@ -720,6 +727,7 @@ SOperatorInfo* createStreamEventAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); pInfo->reCkBlock = false; + pInfo->recvGetAll = false; // for stream void* buff = NULL; diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 3dfc92d953..d230442c6d 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -1345,12 +1345,12 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { return buildIntervalResult(pOperator); } -static int64_t getDeleteMark(SIntervalPhysiNode* pIntervalPhyNode) { - if (pIntervalPhyNode->window.deleteMark <= 0) { +int64_t getDeleteMark(SWindowPhysiNode* pWinPhyNode, int64_t interval) { + if (pWinPhyNode->deleteMark <= 0) { return DEAULT_DELETE_MARK; } - int64_t deleteMark = TMAX(pIntervalPhyNode->window.deleteMark, pIntervalPhyNode->window.watermark); - deleteMark = TMAX(deleteMark, pIntervalPhyNode->interval); + int64_t deleteMark = TMAX(pWinPhyNode->deleteMark, pWinPhyNode->watermark); + deleteMark = TMAX(deleteMark, interval); return deleteMark; } @@ -1442,7 +1442,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, .calTrigger = pIntervalPhyNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode), + .deleteMark = getDeleteMark(&pIntervalPhyNode->window, pIntervalPhyNode->interval), .deleteMarkSaved = 0, .calTriggerSaved = 0, }; @@ -2565,7 +2565,7 @@ void doStreamSessionSaveCheckpoint(SOperatorInfo* pOperator) { taosMemoryFree(buf); } -static void resetUnCloseSessionWinInfo(SSHashObj* winMap) { +void resetUnCloseSessionWinInfo(SSHashObj* winMap) { void* pIte = NULL; int32_t iter = 0; while ((pIte = tSimpleHashIterate(winMap, pIte, &iter)) != NULL) { @@ -2864,6 +2864,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh .calTrigger = pSessionNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, + .deleteMark = getDeleteMark(&pSessionNode->window, 0), }; initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); @@ -3732,6 +3733,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys .calTrigger = pStateNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, + .deleteMark = getDeleteMark(&pStateNode->window, 0), }; initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); @@ -3963,7 +3965,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys .calTrigger = pIntervalPhyNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode)}; + .deleteMark = getDeleteMark(&pIntervalPhyNode->window, pIntervalPhyNode->interval)}; ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); From ccce9d50858c57afbc161d40cbb27d222afa47d1 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Fri, 8 Dec 2023 18:12:05 +0800 Subject: [PATCH 56/65] init delete mark --- source/libs/executor/src/streamtimewindowoperator.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index d230442c6d..2828e667f4 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -2864,7 +2864,6 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh .calTrigger = pSessionNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, - .deleteMark = getDeleteMark(&pSessionNode->window, 0), }; initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); @@ -3733,7 +3732,6 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys .calTrigger = pStateNode->window.triggerType, .maxTs = INT64_MIN, .minTs = INT64_MAX, - .deleteMark = getDeleteMark(&pStateNode->window, 0), }; initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); From 67d967e22fbcf37d14e31453f2bfca35ec325685 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 18:48:00 +0800 Subject: [PATCH 57/65] fix(tsdb): check the overlap between stt blocks and data file blocks. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 18 +++++++---- source/dnode/vnode/src/tsdb/tsdbReadUtil.c | 35 +++++++++++++++------- source/dnode/vnode/src/tsdb/tsdbReadUtil.h | 3 +- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 7b18e33e3c..8a3da23649 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -606,6 +606,13 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockN return TSDB_CODE_OUT_OF_MEMORY; } + if (pScanInfo->filesetWindow.skey > pRecord->firstKey) { + pScanInfo->filesetWindow.skey = pRecord->firstKey; + } + if (pScanInfo->filesetWindow.ekey < pRecord->lastKey) { + pScanInfo->filesetWindow.ekey = pRecord->lastKey; + } + pBlockNum->numOfBlocks += 1; if (taosArrayGetSize(pTableScanInfoList) == 0) { taosArrayPush(pTableScanInfoList, &pScanInfo); @@ -2698,17 +2705,17 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { // if only require the total rows, no need to load data from stt file if it is clean stt blocks if (pReader->info.execMode == READER_EXEC_ROWS && pScanInfo->cleanSttBlocks) { + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + SDataBlockInfo* pInfo = &pResBlock->info; pInfo->rows = pScanInfo->numOfRowsInStt; pInfo->id.uid = pScanInfo->uid; pInfo->dataLoad = 1; pInfo->window = pScanInfo->sttWindow; setComposedBlockFlag(pReader, true); - pScanInfo->sttKeyInfo.nextProcKey = - ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; + pScanInfo->sttKeyInfo.nextProcKey = asc ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; - pScanInfo->lastProcKey = - ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; + pScanInfo->lastProcKey = asc ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; pSttBlockReader->mergeTree.pIter = NULL; pScanInfo->sttBlockReturned = true; @@ -2833,7 +2840,8 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { tsdbDebug("load data in stt block firstly %s", pReader->idStr); int64_t st = taosGetTimestampUs(); - // let's load data from stt files + // let's load data from stt files, make sure clear the cleanStt block flag before load the data from stt files + pScanInfo->cleanSttBlocks = false; initSttBlockReader(pSttBlockReader, pScanInfo, pReader); // no data in stt block, no need to proceed. diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 2db49b8815..590e7e579d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -22,6 +22,8 @@ #include "tsdbUtil2.h" #include "tsimplehash.h" +#define INIT_TIMEWINDOW(_w) do { (_w)->skey = INT64_MAX; (_w)->ekey = INT64_MIN;} while(0); + static bool overlapWithDelSkylineWithoutVer(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order); static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { @@ -155,6 +157,9 @@ SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j); pScanInfo->uid = idList[j].uid; + INIT_TIMEWINDOW(&pScanInfo->sttWindow); + INIT_TIMEWINDOW(&pScanInfo->filesetWindow); + pUidList->tableUidList[j] = idList[j].uid; if (ASCENDING_TRAVERSE(pTsdbReader->info.order)) { @@ -247,8 +252,8 @@ static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) { taosArrayClear(pScanInfo->pFileDelData); // del data from each file set pScanInfo->cleanSttBlocks = false; pScanInfo->numOfRowsInStt = 0; - pScanInfo->sttWindow.skey = INT64_MAX; - pScanInfo->sttWindow.ekey = INT64_MIN; + INIT_TIMEWINDOW(&pScanInfo->sttWindow); + INIT_TIMEWINDOW(&pScanInfo->filesetWindow); pScanInfo->sttKeyInfo.status = STT_FILE_READER_UNINIT; } @@ -409,12 +414,10 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3 blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i); taosArrayPush(pBlockIter->blockList, &blockInfo); - STableDataBlockIdx tableDataBlockIdx = {.globalIndex = i}; taosArrayPush(pTableScanInfo->pBlockIdxList, &tableDataBlockIdx); } - taosArrayDestroy(pTableScanInfo->pBlockList); - pTableScanInfo->pBlockList = NULL; + pTableScanInfo->pBlockList = taosArrayDestroy(pTableScanInfo->pBlockList); int64_t et = taosGetTimestampUs(); tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", @@ -463,8 +466,7 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3 for (int32_t i = 0; i < numOfTables; ++i) { STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); - taosArrayDestroy(pTableScanInfo->pBlockList); - pTableScanInfo->pBlockList = NULL; + pTableScanInfo->pBlockList = taosArrayDestroy(pTableScanInfo->pBlockList); } int64_t et = taosGetTimestampUs(); @@ -845,7 +847,10 @@ int32_t tsdbGetRowsInSttFiles(STFileSet* pFileSet, SArray* pSttFileBlockIterArra return numOfRows; } -// overlap with deletion skyline +static bool overlapHelper(const STimeWindow* pLeft, TSKEY minKey, TSKEY maxKey) { + return (pLeft->ekey >= minKey) && (pLeft->skey <= maxKey); +} + static bool overlapWithTimeWindow(STimeWindow* p1, STimeWindow* pQueryWindow, STableBlockScanInfo* pBlockScanInfo, int32_t order) { // overlap with query window @@ -853,16 +858,24 @@ static bool overlapWithTimeWindow(STimeWindow* p1, STimeWindow* pQueryWindow, ST return true; } - // overlap with mem data SIterInfo* pMemIter = &pBlockScanInfo->iter; SIterInfo* pIMemIter = &pBlockScanInfo->iiter; - if ((pMemIter->hasVal) && p1->ekey >= pMemIter->iter->pTbData->minKey && p1->skey <= pMemIter->iter->pTbData->maxKey) { + // overlap with mem data + STbData* pTbData = pMemIter->iter->pTbData; + if ((pMemIter->hasVal) && overlapHelper(p1, pTbData->minKey, pTbData->maxKey)) { return true; } // overlap with imem data - if ((pIMemIter->hasVal) && p1->ekey >= pIMemIter->iter->pTbData->minKey && p1->skey <= pIMemIter->iter->pTbData->maxKey) { + STbData* pITbData = pIMemIter->iter->pTbData; + if ((pIMemIter->hasVal) && overlapHelper(p1, pITbData->minKey, pITbData->maxKey)) { + return true; + } + + // overlap with data file block + STimeWindow* pFileWin = &pBlockScanInfo->filesetWindow; + if ((taosArrayGetSize(pBlockScanInfo->pBlockIdxList) > 0) && overlapHelper(p1, pFileWin->skey, pFileWin->ekey)) { return true; } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index a9e80e1b8c..43cd499aca 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -96,7 +96,8 @@ typedef struct STableBlockScanInfo { bool cleanSttBlocks; // stt block is clean in current fileset bool sttBlockReturned; // result block returned alreay int64_t numOfRowsInStt; - STimeWindow sttWindow; + STimeWindow sttWindow; // timestamp window for current stt files + STimeWindow filesetWindow; // timestamp window for current file set } STableBlockScanInfo; typedef struct SResultBlockInfo { From da636ab527911133fe21deaacd0eb92b90557144 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 18:55:20 +0800 Subject: [PATCH 58/65] fix(tsdb): add null check. --- source/dnode/vnode/src/tsdb/tsdbReadUtil.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 590e7e579d..3c26badc0e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -862,15 +862,19 @@ static bool overlapWithTimeWindow(STimeWindow* p1, STimeWindow* pQueryWindow, ST SIterInfo* pIMemIter = &pBlockScanInfo->iiter; // overlap with mem data - STbData* pTbData = pMemIter->iter->pTbData; - if ((pMemIter->hasVal) && overlapHelper(p1, pTbData->minKey, pTbData->maxKey)) { - return true; + if (pMemIter->hasVal) { + STbData* pTbData = pMemIter->iter->pTbData; + if (overlapHelper(p1, pTbData->minKey, pTbData->maxKey)) { + return true; + } } // overlap with imem data - STbData* pITbData = pIMemIter->iter->pTbData; - if ((pIMemIter->hasVal) && overlapHelper(p1, pITbData->minKey, pITbData->maxKey)) { - return true; + if (pIMemIter->hasVal) { + STbData* pITbData = pIMemIter->iter->pTbData; + if (overlapHelper(p1, pITbData->minKey, pITbData->maxKey)) { + return true; + } } // overlap with data file block From f2592f7399b9e787ba9b5ec17f08d4ef104050de Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 23:44:05 +0800 Subject: [PATCH 59/65] fix(tsdb): prepare the memory buffer. --- source/dnode/vnode/src/tsdb/tsdbMergeTree.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead2.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 0f78fbfbfb..0b86cae1be 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -166,7 +166,7 @@ static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) { pInfo->cost.blockElapsedTime += el; pInfo->cost.loadBlocks += 1; - tsdbDebug("read last block, total load:%" PRId64 ", trigger by uid:%" PRIu64 ", stt-fileVer:%" PRId64 + tsdbDebug("read stt block, total load:%" PRId64 ", trigger by uid:%" PRIu64 ", stt-fileVer:%" PRId64 ", last block index:%d, entry:%d, rows:%d, uidRange:%" PRId64 "-%" PRId64 " tsRange:%" PRId64 "-%" PRId64 " %p, elapsed time:%.2f ms, %s", pInfo->cost.loadBlocks, pIter->uid, pIter->cid, pIter->iSttBlk, pInfo->currentLoadBlockIndex, pBlock->nRow, diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 8a3da23649..1ca42f4578 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -631,7 +631,7 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockN double el = (taosGetTimestampUs() - st) / 1000.0; tsdbDebug( - "load block of %d tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " + "load block of %d tables completed, blocks:%d in %d tables, stt-files:%d, block-info-size:%.2f Kb, elapsed " "time:%.2f ms %s", numOfTables, pBlockNum->numOfBlocks, (int32_t)taosArrayGetSize(pTableScanInfoList), pBlockNum->numOfSttFiles, sizeInDisk / 1000.0, el, pReader->idStr); @@ -2712,6 +2712,8 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { pInfo->id.uid = pScanInfo->uid; pInfo->dataLoad = 1; pInfo->window = pScanInfo->sttWindow; + blockDataEnsureCapacity(pResBlock, pInfo->rows); + setComposedBlockFlag(pReader, true); pScanInfo->sttKeyInfo.nextProcKey = asc ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; From 5528c2bf67d2a683844cf9ba37900e23624974cc Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Dec 2023 23:47:49 +0800 Subject: [PATCH 60/65] fix(tsdb): do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 1ca42f4578..347672ee53 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -2708,19 +2708,22 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->info.order); SDataBlockInfo* pInfo = &pResBlock->info; + blockDataEnsureCapacity(pResBlock, pInfo->rows); + pInfo->rows = pScanInfo->numOfRowsInStt; pInfo->id.uid = pScanInfo->uid; pInfo->dataLoad = 1; pInfo->window = pScanInfo->sttWindow; - blockDataEnsureCapacity(pResBlock, pInfo->rows); setComposedBlockFlag(pReader, true); + pScanInfo->sttKeyInfo.nextProcKey = asc ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; pScanInfo->lastProcKey = asc ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; - pSttBlockReader->mergeTree.pIter = NULL; pScanInfo->sttBlockReturned = true; + pSttBlockReader->mergeTree.pIter = NULL; + tsdbDebug("%p uid:%" PRId64 " return clean stt block as one, brange:%" PRId64 "-%" PRId64 " rows:%" PRId64 " %s", pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, pReader->idStr); From 9acd4af1f9910602934aad0c2044d9b0335e500d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 9 Dec 2023 01:49:55 +0800 Subject: [PATCH 61/65] fix(tsdb):fix capacity rows value. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 347672ee53..a2ef109800 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -2708,7 +2708,7 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->info.order); SDataBlockInfo* pInfo = &pResBlock->info; - blockDataEnsureCapacity(pResBlock, pInfo->rows); + blockDataEnsureCapacity(pResBlock, pScanInfo->numOfRowsInStt); pInfo->rows = pScanInfo->numOfRowsInStt; pInfo->id.uid = pScanInfo->uid; From f68c46a691ef6869cfc04fecf7d83e561d1f031e Mon Sep 17 00:00:00 2001 From: zk66214 Date: Sun, 10 Dec 2023 18:27:22 +0800 Subject: [PATCH 62/65] Add test cases for TD-27403 --- .../system-test/1-insert/insert_timestamp.py | 166 +++++++++++++----- 1 file changed, 124 insertions(+), 42 deletions(-) diff --git a/tests/system-test/1-insert/insert_timestamp.py b/tests/system-test/1-insert/insert_timestamp.py index 621912f664..5601ce96e1 100644 --- a/tests/system-test/1-insert/insert_timestamp.py +++ b/tests/system-test/1-insert/insert_timestamp.py @@ -1,4 +1,5 @@ -import sys +import datetime +import sys from util.log import * from util.cases import * from util.sql import * @@ -11,57 +12,138 @@ class TDTestCase: tdLog.debug("start to execute %s" % __file__) tdSql.init(conn.cursor(), True) - #def prepare_data(self): - - def run(self): - tdSql.execute("create database test_insert_timestamp;") + """ + timestamp输入插入规则: + 对于插入的字段类型为timestamp类型的字段,只允许这么几种情况: + timestamp + timestamp +/- interval + interval + timestamp + timestamp可以是字符串譬如:"2023-12-05 00:00:00.000", 也可以是int型, 譬如:1701619200000 + interval支持:b, u, a, s, m, h, d, w 不支持n, y,譬如:1h, 2d + + 仅支持2元表达式,譬如:timestamp + 2h, 不支持2元以上表达,譬如timestamp + 2h + 1d + """ + + tdSql.execute("create database test_insert_timestamp PRECISION 'ns';") tdSql.execute("use test_insert_timestamp;") tdSql.execute("create stable st(ts timestamp, c1 int) tags(id int);") tdSql.execute("create table test_t using st tags(1);") - tdSql.error("insert into test_t values(now + today(), 1 ); ") - tdSql.error("insert into test_t values(now - today(), 1 ); ") - tdSql.error("insert into test_t values(today() + now(), 1 ); ") - tdSql.error("insert into test_t values(today() - now(), 1 ); ") - tdSql.error("insert into test_t values(2h - now(), 1 ); ") - tdSql.error("insert into test_t values(2h - today(), 1 ); ") - tdSql.error("insert into test_t values(2h - 1h, 1 ); ") - tdSql.error("insert into test_t values(2h + 1h, 1 ); ") - tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + '2023-11-28 00:00:00.000', 1 ); ") - tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + 1701111600000, 1 ); ") - tdSql.error("insert into test_t values(1701111500000 + 1701111600000, 1 ); ") - tdSql.error("insert into test_insert_timestamp.test_t values(1701111600000 + 1h + 1s, 4); ") + expectErrInfo = "syntax error" + # 异常场景:timestamp + timestamp + tdSql.error("insert into test_t values(now + today(), 1 );", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(now - today(), 1 );", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(today() + now(), 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(today() - now(), 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + '2023-11-28 00:00:00.000', 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(1701111500000 + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo) - tdSql.execute("insert into test_insert_timestamp.test_t values(1701111600000 + 1h, 4); ") - tdSql.execute("insert into test_insert_timestamp.test_t values(2h + 1701111600000, 5); ") - tdSql.execute("insert into test_insert_timestamp.test_t values('2023-11-28 00:00:00.000' + 1h, 1); ") - tdSql.execute("insert into test_insert_timestamp.test_t values(3h + '2023-11-28 00:00:00.000', 3); ") - tdSql.execute("insert into test_insert_timestamp.test_t values(1701111600000 - 1h, 2); ") - tdSql.execute("insert into test_insert_timestamp.test_t values(1701122400000, 6); ") - tdSql.execute("insert into test_insert_timestamp.test_t values('2023-11-28 07:00:00.000', 7); ") + # 异常场景:timestamp + interval + interval + tdSql.error("insert into test_t values(today() + 1d + 1s, 1);", expectErrInfo=expectErrInfo) + + # 异常场景:interval - timestamp + tdSql.error("insert into test_t values(2h - now(), 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(2h - today(), 1 ); ", expectErrInfo=expectErrInfo) + + # 异常场景:interval + interval + tdSql.error("insert into test_t values(2h - 1h, 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(2h + 1h, 1 ); ", expectErrInfo=expectErrInfo) + + # 异常场景:非法interval类型n + tdSql.error("insert into test_t values(today() + 2n, 7); ", expectErrInfo=expectErrInfo) + + # 异常场景:非法interval类型y + tdSql.error("insert into test_t values(today() - 2y, 8);", expectErrInfo=expectErrInfo) + + # 异常场景:数据类型不对 + tdSql.error("insert into test_t values('a1701619200000', 8);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values('ss2023-12-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(123456, 1);", expectErrInfo="Timestamp data out of range") + tdSql.error("insert into test_t values(123.456, 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(True, 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(None, 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(null, 1);", expectErrInfo=expectErrInfo) + + # 异常场景:格式不对 + tdSql.error("insert into test_t values('2023-122-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values('2023-12--05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values('12/12/2023' + 10a, 1);", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(1701619200000111, 1);", expectErrInfo="Timestamp data out of range") + + # 正常场景:timestamp + interval + tdSql.execute("insert into test_t values(today() + 2b, 1);") + tdSql.execute("insert into test_t values(1701619200000000000 + 2u, 2);") + tdSql.execute("insert into test_t values(today + 2a, 3);") + tdSql.execute("insert into test_t values('2023-12-05 23:59:59.999' + 2a, 4);") + tdSql.execute("insert into test_t values(1701921599000000000 + 3a, 5);") + + # 正常场景:timestamp - interval + tdSql.execute("insert into test_t values(today() - 2s, 6);") + tdSql.execute("insert into test_t values(now() - 2m, 7);") + tdSql.execute("insert into test_t values(today - 2h, 8);") + tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000000000' - 2a, 9);") + tdSql.execute("insert into test_t values(1701669000000000000 - 2a, 10);") + + # 正常场景:interval + timestamp + tdSql.execute("insert into test_t values(2d + now, 11);") + tdSql.execute("insert into test_t values(2w + today, 12);") + + # 正常场景:timestamp + tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000', 13);") + tdSql.execute("insert into test_t values(1701629100000000000, 14);") + tdSql.execute("insert into test_t values(now() + 2s, 15);") + tdSql.execute("insert into test_t values('2023-12-05 00:00:59.999999999+07:00' + 10a, 16);") + tdSql.execute("insert into test_t values('2023-12-05T00:00:59.110+07:00' + 10a, 17);") + tdSql.execute("insert into test_t values('2023-12-05' + 10a, 18);") + tdSql.execute("insert into test_t values('2023-11-15', -15);") + tdSql.execute("insert into test_t values(1701619200000000000 - 2a, -10);") + tdSql.execute("insert into test_t values(1701619200000000000, -5);") + tdSql.execute("insert into test_t values('2023-12-05 12:12:12' + 10a, 19);") + + # 验证数据 + tdSql.query(f'select ts,c1 from test_t order by c1;') + tdSql.checkRows(22) + tdSql.checkEqual(tdSql.queryResult[0][0], 1699977600000000000) # c1=-15 + tdSql.checkEqual(tdSql.queryResult[1][0], 1701619199998000000) # c1=-10 + tdSql.checkEqual(tdSql.queryResult[2][0], 1701619200000000000) # c1=-5 + tdSql.checkEqual(tdSql.queryResult[3][0], self.__get_today_ts() + 2) # c1=1 + tdSql.checkEqual(tdSql.queryResult[4][0], 1701619200000002000) # c1=2 + tdSql.checkEqual(tdSql.queryResult[5][0], self.__get_today_ts() + 2000000) # c1=3 + tdSql.checkEqual(tdSql.queryResult[6][0], 1701792000001000000) # c1=4 + tdSql.checkEqual(tdSql.queryResult[7][0], 1701921599003000000) # c1=5 + tdSql.checkEqual(tdSql.queryResult[8][0], self.__get_today_ts() - 2000000000) # c1=6 + tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[9][0]), str(datetime.date.today())) # c1=7 + tdSql.checkEqual(tdSql.queryResult[10][0], self.__get_today_ts() - 7200000000000) # c1=8 + tdSql.checkEqual(tdSql.queryResult[11][0], 1701705599998000000) # c1=9 + tdSql.checkEqual(tdSql.queryResult[12][0], 1701668999998000000) # c1=10 + tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[13][0]), str(datetime.date.today() + datetime.timedelta(days=2))) # c1=11 + tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[14][0]), str(datetime.date.today() + datetime.timedelta(days=14))) # c1=12 + tdSql.checkEqual(tdSql.queryResult[15][0], 1701705600000000000) # c1=13 + tdSql.checkEqual(tdSql.queryResult[16][0], 1701629100000000000) # c1=14 + tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[17][0]), str(datetime.date.today())) # c1=15 + tdSql.checkEqual(tdSql.queryResult[18][0], 1701709260009999999) # c1=16 + tdSql.checkEqual(tdSql.queryResult[19][0], 1701709259120000000) # c1=17 + tdSql.checkEqual(tdSql.queryResult[20][0], 1701705600010000000) # c1=18 + tdSql.checkEqual(tdSql.queryResult[21][0], 1701749532010000000) # c1=19 - tdSql.query(f'select ts, c1 from test_t order by ts;') - tdSql.checkRows(7) - tdSql.checkEqual(tdSql.queryResult[0][0], datetime.datetime(2023, 11, 28, 1, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[0][1], 1) - tdSql.checkEqual(tdSql.queryResult[1][0], datetime.datetime(2023, 11, 28, 2, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[1][1], 2) - tdSql.checkEqual(tdSql.queryResult[2][0], datetime.datetime(2023, 11, 28, 3, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[2][1], 3) - tdSql.checkEqual(tdSql.queryResult[3][0], datetime.datetime(2023, 11, 28, 4, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[3][1], 4) - tdSql.checkEqual(tdSql.queryResult[4][0], datetime.datetime(2023, 11, 28, 5, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[4][1], 5) - tdSql.checkEqual(tdSql.queryResult[5][0], datetime.datetime(2023, 11, 28, 6, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[5][1], 6) - tdSql.checkEqual(tdSql.queryResult[6][0], datetime.datetime(2023, 11, 28, 7, 0, 0) ) - tdSql.checkEqual(tdSql.queryResult[6][1], 7) - tdSql.execute("drop table if exists test_t ;") tdSql.execute("drop stable if exists st;") tdSql.execute("drop database if exists test_insert_timestamp;") - + + def __convert_ts_to_date(self, ts: int) -> str: + # 创建datetime对象并进行转换 + dt_object = datetime.datetime.fromtimestamp(ts / 1e9) + + # 格式化日期字符串 + formatted_date = dt_object.strftime('%Y-%m-%d') + # print("转换后的日期为:", formatted_date) + return formatted_date + + def __get_today_ts(self) -> int: + return int(time.mktime(time.strptime(str(datetime.date.today()), "%Y-%m-%d"))) * 1000000000 + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) From 06c0a090c27b6a318b519ae217deb8b104d36f1a Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Mon, 11 Dec 2023 11:32:14 +0800 Subject: [PATCH 63/65] opt ignore expried rule --- source/libs/executor/src/streamtimewindowoperator.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 2828e667f4..ef3a4d6f90 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -634,9 +634,6 @@ static void addRetriveWindow(SArray* wins, SStreamIntervalOperatorInfo* pInfo, i for (int32_t i = 0; i < size; i++) { SWinKey* winKey = taosArrayGet(wins, i); STimeWindow nextWin = getFinalTimeWindow(winKey->ts, &pInfo->interval); - if (isOverdue(nextWin.ekey, &pInfo->twAggSup) && pInfo->ignoreExpiredData) { - continue; - } void* chIds = taosHashGet(pInfo->pPullDataMap, winKey, sizeof(SWinKey)); if (!chIds) { SPullWindowInfo pull = { @@ -801,7 +798,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat } while (1) { bool isClosed = isCloseWindow(&nextWin, &pInfo->twAggSup); - if ((!IS_FINAL_INTERVAL_OP(pOperator) && pInfo->ignoreExpiredData && + if ((!IS_FINAL_INTERVAL_OP(pOperator) && pInfo->ignoreExpiredData && pSDataBlock->info.type != STREAM_PULL_DATA && checkExpiredData(&pInfo->stateStore, pInfo->pUpdateInfo, &pInfo->twAggSup, pSDataBlock->info.id.uid, nextWin.ekey)) || !inSlidingWindow(&pInfo->interval, &nextWin, &pSDataBlock->info)) { From 1b1d8200d534bb02aa64b88c0d15f6fab99fc7bb Mon Sep 17 00:00:00 2001 From: Alex Duan <51781608+DuanKuanJun@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:13:25 +0800 Subject: [PATCH 64/65] Revert "Test/3.0/td 27403" --- .../system-test/1-insert/insert_timestamp.py | 166 +++++------------- 1 file changed, 42 insertions(+), 124 deletions(-) diff --git a/tests/system-test/1-insert/insert_timestamp.py b/tests/system-test/1-insert/insert_timestamp.py index 5601ce96e1..621912f664 100644 --- a/tests/system-test/1-insert/insert_timestamp.py +++ b/tests/system-test/1-insert/insert_timestamp.py @@ -1,5 +1,4 @@ -import datetime -import sys +import sys from util.log import * from util.cases import * from util.sql import * @@ -12,138 +11,57 @@ class TDTestCase: tdLog.debug("start to execute %s" % __file__) tdSql.init(conn.cursor(), True) + #def prepare_data(self): + + def run(self): - """ - timestamp输入插入规则: - 对于插入的字段类型为timestamp类型的字段,只允许这么几种情况: - timestamp - timestamp +/- interval - interval + timestamp - timestamp可以是字符串譬如:"2023-12-05 00:00:00.000", 也可以是int型, 譬如:1701619200000 - interval支持:b, u, a, s, m, h, d, w 不支持n, y,譬如:1h, 2d - - 仅支持2元表达式,譬如:timestamp + 2h, 不支持2元以上表达,譬如timestamp + 2h + 1d - """ - - tdSql.execute("create database test_insert_timestamp PRECISION 'ns';") + tdSql.execute("create database test_insert_timestamp;") tdSql.execute("use test_insert_timestamp;") tdSql.execute("create stable st(ts timestamp, c1 int) tags(id int);") tdSql.execute("create table test_t using st tags(1);") - expectErrInfo = "syntax error" - # 异常场景:timestamp + timestamp - tdSql.error("insert into test_t values(now + today(), 1 );", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(now - today(), 1 );", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(today() + now(), 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(today() - now(), 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + '2023-11-28 00:00:00.000', 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(1701111500000 + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo) + tdSql.error("insert into test_t values(now + today(), 1 ); ") + tdSql.error("insert into test_t values(now - today(), 1 ); ") + tdSql.error("insert into test_t values(today() + now(), 1 ); ") + tdSql.error("insert into test_t values(today() - now(), 1 ); ") + tdSql.error("insert into test_t values(2h - now(), 1 ); ") + tdSql.error("insert into test_t values(2h - today(), 1 ); ") + tdSql.error("insert into test_t values(2h - 1h, 1 ); ") + tdSql.error("insert into test_t values(2h + 1h, 1 ); ") + tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + '2023-11-28 00:00:00.000', 1 ); ") + tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + 1701111600000, 1 ); ") + tdSql.error("insert into test_t values(1701111500000 + 1701111600000, 1 ); ") + tdSql.error("insert into test_insert_timestamp.test_t values(1701111600000 + 1h + 1s, 4); ") - # 异常场景:timestamp + interval + interval - tdSql.error("insert into test_t values(today() + 1d + 1s, 1);", expectErrInfo=expectErrInfo) - - # 异常场景:interval - timestamp - tdSql.error("insert into test_t values(2h - now(), 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(2h - today(), 1 ); ", expectErrInfo=expectErrInfo) - - # 异常场景:interval + interval - tdSql.error("insert into test_t values(2h - 1h, 1 ); ", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(2h + 1h, 1 ); ", expectErrInfo=expectErrInfo) - - # 异常场景:非法interval类型n - tdSql.error("insert into test_t values(today() + 2n, 7); ", expectErrInfo=expectErrInfo) - - # 异常场景:非法interval类型y - tdSql.error("insert into test_t values(today() - 2y, 8);", expectErrInfo=expectErrInfo) - - # 异常场景:数据类型不对 - tdSql.error("insert into test_t values('a1701619200000', 8);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values('ss2023-12-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(123456, 1);", expectErrInfo="Timestamp data out of range") - tdSql.error("insert into test_t values(123.456, 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(True, 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(None, 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(null, 1);", expectErrInfo=expectErrInfo) - - # 异常场景:格式不对 - tdSql.error("insert into test_t values('2023-122-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values('2023-12--05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values('12/12/2023' + 10a, 1);", expectErrInfo=expectErrInfo) - tdSql.error("insert into test_t values(1701619200000111, 1);", expectErrInfo="Timestamp data out of range") - - # 正常场景:timestamp + interval - tdSql.execute("insert into test_t values(today() + 2b, 1);") - tdSql.execute("insert into test_t values(1701619200000000000 + 2u, 2);") - tdSql.execute("insert into test_t values(today + 2a, 3);") - tdSql.execute("insert into test_t values('2023-12-05 23:59:59.999' + 2a, 4);") - tdSql.execute("insert into test_t values(1701921599000000000 + 3a, 5);") - - # 正常场景:timestamp - interval - tdSql.execute("insert into test_t values(today() - 2s, 6);") - tdSql.execute("insert into test_t values(now() - 2m, 7);") - tdSql.execute("insert into test_t values(today - 2h, 8);") - tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000000000' - 2a, 9);") - tdSql.execute("insert into test_t values(1701669000000000000 - 2a, 10);") - - # 正常场景:interval + timestamp - tdSql.execute("insert into test_t values(2d + now, 11);") - tdSql.execute("insert into test_t values(2w + today, 12);") - - # 正常场景:timestamp - tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000', 13);") - tdSql.execute("insert into test_t values(1701629100000000000, 14);") - tdSql.execute("insert into test_t values(now() + 2s, 15);") - tdSql.execute("insert into test_t values('2023-12-05 00:00:59.999999999+07:00' + 10a, 16);") - tdSql.execute("insert into test_t values('2023-12-05T00:00:59.110+07:00' + 10a, 17);") - tdSql.execute("insert into test_t values('2023-12-05' + 10a, 18);") - tdSql.execute("insert into test_t values('2023-11-15', -15);") - tdSql.execute("insert into test_t values(1701619200000000000 - 2a, -10);") - tdSql.execute("insert into test_t values(1701619200000000000, -5);") - tdSql.execute("insert into test_t values('2023-12-05 12:12:12' + 10a, 19);") - - # 验证数据 - tdSql.query(f'select ts,c1 from test_t order by c1;') - tdSql.checkRows(22) - tdSql.checkEqual(tdSql.queryResult[0][0], 1699977600000000000) # c1=-15 - tdSql.checkEqual(tdSql.queryResult[1][0], 1701619199998000000) # c1=-10 - tdSql.checkEqual(tdSql.queryResult[2][0], 1701619200000000000) # c1=-5 - tdSql.checkEqual(tdSql.queryResult[3][0], self.__get_today_ts() + 2) # c1=1 - tdSql.checkEqual(tdSql.queryResult[4][0], 1701619200000002000) # c1=2 - tdSql.checkEqual(tdSql.queryResult[5][0], self.__get_today_ts() + 2000000) # c1=3 - tdSql.checkEqual(tdSql.queryResult[6][0], 1701792000001000000) # c1=4 - tdSql.checkEqual(tdSql.queryResult[7][0], 1701921599003000000) # c1=5 - tdSql.checkEqual(tdSql.queryResult[8][0], self.__get_today_ts() - 2000000000) # c1=6 - tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[9][0]), str(datetime.date.today())) # c1=7 - tdSql.checkEqual(tdSql.queryResult[10][0], self.__get_today_ts() - 7200000000000) # c1=8 - tdSql.checkEqual(tdSql.queryResult[11][0], 1701705599998000000) # c1=9 - tdSql.checkEqual(tdSql.queryResult[12][0], 1701668999998000000) # c1=10 - tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[13][0]), str(datetime.date.today() + datetime.timedelta(days=2))) # c1=11 - tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[14][0]), str(datetime.date.today() + datetime.timedelta(days=14))) # c1=12 - tdSql.checkEqual(tdSql.queryResult[15][0], 1701705600000000000) # c1=13 - tdSql.checkEqual(tdSql.queryResult[16][0], 1701629100000000000) # c1=14 - tdSql.checkEqual(self.__convert_ts_to_date(tdSql.queryResult[17][0]), str(datetime.date.today())) # c1=15 - tdSql.checkEqual(tdSql.queryResult[18][0], 1701709260009999999) # c1=16 - tdSql.checkEqual(tdSql.queryResult[19][0], 1701709259120000000) # c1=17 - tdSql.checkEqual(tdSql.queryResult[20][0], 1701705600010000000) # c1=18 - tdSql.checkEqual(tdSql.queryResult[21][0], 1701749532010000000) # c1=19 + tdSql.execute("insert into test_insert_timestamp.test_t values(1701111600000 + 1h, 4); ") + tdSql.execute("insert into test_insert_timestamp.test_t values(2h + 1701111600000, 5); ") + tdSql.execute("insert into test_insert_timestamp.test_t values('2023-11-28 00:00:00.000' + 1h, 1); ") + tdSql.execute("insert into test_insert_timestamp.test_t values(3h + '2023-11-28 00:00:00.000', 3); ") + tdSql.execute("insert into test_insert_timestamp.test_t values(1701111600000 - 1h, 2); ") + tdSql.execute("insert into test_insert_timestamp.test_t values(1701122400000, 6); ") + tdSql.execute("insert into test_insert_timestamp.test_t values('2023-11-28 07:00:00.000', 7); ") + tdSql.query(f'select ts, c1 from test_t order by ts;') + tdSql.checkRows(7) + tdSql.checkEqual(tdSql.queryResult[0][0], datetime.datetime(2023, 11, 28, 1, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[0][1], 1) + tdSql.checkEqual(tdSql.queryResult[1][0], datetime.datetime(2023, 11, 28, 2, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[1][1], 2) + tdSql.checkEqual(tdSql.queryResult[2][0], datetime.datetime(2023, 11, 28, 3, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[2][1], 3) + tdSql.checkEqual(tdSql.queryResult[3][0], datetime.datetime(2023, 11, 28, 4, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[3][1], 4) + tdSql.checkEqual(tdSql.queryResult[4][0], datetime.datetime(2023, 11, 28, 5, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[4][1], 5) + tdSql.checkEqual(tdSql.queryResult[5][0], datetime.datetime(2023, 11, 28, 6, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[5][1], 6) + tdSql.checkEqual(tdSql.queryResult[6][0], datetime.datetime(2023, 11, 28, 7, 0, 0) ) + tdSql.checkEqual(tdSql.queryResult[6][1], 7) + tdSql.execute("drop table if exists test_t ;") tdSql.execute("drop stable if exists st;") tdSql.execute("drop database if exists test_insert_timestamp;") - - def __convert_ts_to_date(self, ts: int) -> str: - # 创建datetime对象并进行转换 - dt_object = datetime.datetime.fromtimestamp(ts / 1e9) - - # 格式化日期字符串 - formatted_date = dt_object.strftime('%Y-%m-%d') - # print("转换后的日期为:", formatted_date) - return formatted_date - - def __get_today_ts(self) -> int: - return int(time.mktime(time.strptime(str(datetime.date.today()), "%Y-%m-%d"))) * 1000000000 - + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) From 7e21030dfda7fe87a0103cecbfad7ffc53516fe0 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Mon, 11 Dec 2023 15:16:01 +0800 Subject: [PATCH 65/65] set event window rows --- source/libs/executor/src/streameventwindowoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 3d38dfffa4..9371bc4a3a 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -176,7 +176,7 @@ int32_t updateEventWindowInfo(SStreamAggSupporter* pAggSup, SEventWindowInfo* pW for (int32_t i = start; i < rows; ++i) { if (pTsData[i] >= maxTs) { - return i - 1 - start; + return i - start; } if (pWin->skey > pTsData[i]) {