diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 80f6b8e9e7..dd15807308 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -430,7 +430,7 @@ pipeline { date rm -rf ${WKC}/debug cd ${WKC}/tests/parallel_test - time ./container_build.sh -w ${WKDIR} -t 10 -e + time ./container_build.sh -w ${WKDIR} -e ''' def extra_param = "" def log_server_file = "/home/log_server.json" diff --git a/cmake/cmake.version b/cmake/cmake.version index ba85a3d99b..a4c783b6c8 100644 --- a/cmake/cmake.version +++ b/cmake/cmake.version @@ -2,7 +2,7 @@ IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "3.0.2.2") + SET(TD_VER_NUMBER "3.0.2.4") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/cmake/taosadapter_CMakeLists.txt.in b/cmake/taosadapter_CMakeLists.txt.in index ab1609f35f..d156057459 100644 --- a/cmake/taosadapter_CMakeLists.txt.in +++ b/cmake/taosadapter_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosadapter ExternalProject_Add(taosadapter GIT_REPOSITORY https://github.com/taosdata/taosadapter.git - GIT_TAG 69eee2e + GIT_TAG 3e08996 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index d01928cfe8..1053caf4ef 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 5aa25e9 + GIT_TAG a0234fe SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/docs/examples/go/go.mod b/docs/examples/go/go.mod deleted file mode 100644 index 2bc1a74cb6..0000000000 --- a/docs/examples/go/go.mod +++ /dev/null @@ -1,6 +0,0 @@ -module goexample - -go 1.17 - -require github.com/taosdata/driver-go/v3 3.0 - diff --git a/docs/examples/python/conn_native_pandas.py b/docs/examples/python/conn_native_pandas.py index 56942ef570..f3bab15efb 100644 --- a/docs/examples/python/conn_native_pandas.py +++ b/docs/examples/python/conn_native_pandas.py @@ -1,8 +1,11 @@ import pandas -from sqlalchemy import create_engine +from sqlalchemy import create_engine, text engine = create_engine("taos://root:taosdata@localhost:6030/power") -df = pandas.read_sql("SELECT * FROM meters", engine) +conn = engine.connect() +df = pandas.read_sql(text("SELECT * FROM power.meters"), conn) +conn.close() + # print index print(df.index) diff --git a/docs/examples/python/conn_rest_pandas.py b/docs/examples/python/conn_rest_pandas.py index 0164080cd5..1b207d6ff1 100644 --- a/docs/examples/python/conn_rest_pandas.py +++ b/docs/examples/python/conn_rest_pandas.py @@ -1,8 +1,10 @@ import pandas -from sqlalchemy import create_engine +from sqlalchemy import create_engine, text engine = create_engine("taosrest://root:taosdata@localhost:6041") -df: pandas.DataFrame = pandas.read_sql("SELECT * FROM power.meters", engine) +conn = engine.connect() +df: pandas.DataFrame = pandas.read_sql(text("SELECT * FROM power.meters"), conn) +conn.close() # print index print(df.index) diff --git a/docs/examples/python/connect_rest_examples.py b/docs/examples/python/connect_rest_examples.py index 900ec1022e..0f8625ae53 100644 --- a/docs/examples/python/connect_rest_examples.py +++ b/docs/examples/python/connect_rest_examples.py @@ -1,24 +1,25 @@ # ANCHOR: connect from taosrest import connect, TaosRestConnection, TaosRestCursor -conn: TaosRestConnection = connect(url="http://localhost:6041", - user="root", - password="taosdata", - timeout=30) +conn = connect(url="http://localhost:6041", + user="root", + password="taosdata", + timeout=30) # ANCHOR_END: connect # ANCHOR: basic # create STable -cursor: TaosRestCursor = conn.cursor() +cursor = conn.cursor() cursor.execute("DROP DATABASE IF EXISTS power") cursor.execute("CREATE DATABASE power") -cursor.execute("CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)") +cursor.execute( + "CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)") # insert data -cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS(California.SanFrancisco, 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000) - power.d1002 USING power.meters TAGS(California.SanFrancisco, 3) VALUES ('2018-10-03 14:38:16.650', 10.30000, 218, 0.25000) - power.d1003 USING power.meters TAGS(California.LosAngeles, 2) VALUES ('2018-10-03 14:38:05.500', 11.80000, 221, 0.28000) ('2018-10-03 14:38:16.600', 13.40000, 223, 0.29000) - power.d1004 USING power.meters TAGS(California.LosAngeles, 3) VALUES ('2018-10-03 14:38:05.000', 10.80000, 223, 0.29000) ('2018-10-03 14:38:06.500', 11.50000, 221, 0.35000)""") +cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS('California.SanFrancisco', 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000) + power.d1002 USING power.meters TAGS('California.SanFrancisco', 3) VALUES ('2018-10-03 14:38:16.650', 10.30000, 218, 0.25000) + power.d1003 USING power.meters TAGS('California.LosAngeles', 2) VALUES ('2018-10-03 14:38:05.500', 11.80000, 221, 0.28000) ('2018-10-03 14:38:16.600', 13.40000, 223, 0.29000) + power.d1004 USING power.meters TAGS('California.LosAngeles', 3) VALUES ('2018-10-03 14:38:05.000', 10.80000, 223, 0.29000) ('2018-10-03 14:38:06.500', 11.50000, 221, 0.35000)""") print("inserted row count:", cursor.rowcount) # query data @@ -28,7 +29,7 @@ print("queried row count:", cursor.rowcount) # get column names from cursor column_names = [meta[0] for meta in cursor.description] # get rows -data: list[tuple] = cursor.fetchall() +data = cursor.fetchall() print(column_names) for row in data: print(row) diff --git a/docs/examples/python/connection_usage_native_reference.py b/docs/examples/python/connection_usage_native_reference.py index 4803511e42..0a23c5f95b 100644 --- a/docs/examples/python/connection_usage_native_reference.py +++ b/docs/examples/python/connection_usage_native_reference.py @@ -8,7 +8,7 @@ conn.execute("CREATE DATABASE test") # change database. same as execute "USE db" conn.select_db("test") conn.execute("CREATE STABLE weather(ts TIMESTAMP, temperature FLOAT) TAGS (location INT)") -affected_row: int = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m 24.4)") +affected_row = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)") print("affected_row", affected_row) # output: # affected_row 3 @@ -16,10 +16,10 @@ print("affected_row", affected_row) # ANCHOR: query # Execute a sql and get its result set. It's useful for SELECT statement -result: taos.TaosResult = conn.query("SELECT * from weather") +result = conn.query("SELECT * from weather") # Get fields from result -fields: taos.field.TaosFields = result.fields +fields = result.fields for field in fields: print(field) # {name: ts, type: 9, bytes: 8} @@ -42,4 +42,4 @@ print(data) # ANCHOR_END: query -conn.close() +conn.close() \ No newline at end of file diff --git a/docs/examples/python/fast_write_example.py b/docs/examples/python/fast_write_example.py index c9d606388f..626e3310b1 100644 --- a/docs/examples/python/fast_write_example.py +++ b/docs/examples/python/fast_write_example.py @@ -1,15 +1,14 @@ # install dependencies: # recommend python >= 3.8 -# pip3 install faster-fifo # import logging import math +import multiprocessing import sys import time import os -from multiprocessing import Process -from faster_fifo import Queue +from multiprocessing import Process, Queue from mockdatasource import MockDataSource from queue import Empty from typing import List @@ -22,8 +21,7 @@ TABLE_COUNT = 1000 QUEUE_SIZE = 1000000 MAX_BATCH_SIZE = 3000 -read_processes = [] -write_processes = [] +_DONE_MESSAGE = '__DONE__' def get_connection(): @@ -44,41 +42,64 @@ def get_connection(): # ANCHOR: read -def run_read_task(task_id: int, task_queues: List[Queue]): +def run_read_task(task_id: int, task_queues: List[Queue], infinity): table_count_per_task = TABLE_COUNT // READ_TASK_COUNT - data_source = MockDataSource(f"tb{task_id}", table_count_per_task) + data_source = MockDataSource(f"tb{task_id}", table_count_per_task, infinity) try: for batch in data_source: + if isinstance(batch, tuple): + batch = [batch] for table_id, rows in batch: # hash data to different queue i = table_id % len(task_queues) # block putting forever when the queue is full - task_queues[i].put_many(rows, block=True, timeout=-1) + for row in rows: + task_queues[i].put(row) + if not infinity: + for queue in task_queues: + queue.put(_DONE_MESSAGE) except KeyboardInterrupt: pass + finally: + logging.info('read task over') # ANCHOR_END: read + # ANCHOR: write -def run_write_task(task_id: int, queue: Queue): +def run_write_task(task_id: int, queue: Queue, done_queue: Queue): from sql_writer import SQLWriter log = logging.getLogger(f"WriteTask-{task_id}") writer = SQLWriter(get_connection) lines = None try: while True: - try: - # get as many as possible - lines = queue.get_many(block=False, max_messages_to_get=MAX_BATCH_SIZE) + over = False + lines = [] + for _ in range(MAX_BATCH_SIZE): + try: + line = queue.get_nowait() + if line == _DONE_MESSAGE: + over = True + break + if line: + lines.append(line) + except Empty: + time.sleep(0.1) + if len(lines) > 0: writer.process_lines(lines) - except Empty: - time.sleep(0.01) + if over: + done_queue.put(_DONE_MESSAGE) + break except KeyboardInterrupt: pass except BaseException as e: log.debug(f"lines={lines}") raise e + finally: + writer.close() + log.debug('write task over') # ANCHOR_END: write @@ -103,47 +124,64 @@ def set_global_config(): # ANCHOR: monitor -def run_monitor_process(): +def run_monitor_process(done_queue: Queue): log = logging.getLogger("DataBaseMonitor") - conn = get_connection() - conn.execute("DROP DATABASE IF EXISTS test") - conn.execute("CREATE DATABASE test") - conn.execute("CREATE STABLE test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) " - "TAGS (location BINARY(64), groupId INT)") + conn = None + try: + conn = get_connection() - def get_count(): - res = conn.query("SELECT count(*) FROM test.meters") - rows = res.fetch_all() - return rows[0][0] if rows else 0 + def get_count(): + res = conn.query("SELECT count(*) FROM test.meters") + rows = res.fetch_all() + return rows[0][0] if rows else 0 - last_count = 0 - while True: - time.sleep(10) - count = get_count() - log.info(f"count={count} speed={(count - last_count) / 10}") - last_count = count + last_count = 0 + while True: + try: + done = done_queue.get_nowait() + if done == _DONE_MESSAGE: + break + except Empty: + pass + time.sleep(10) + count = get_count() + log.info(f"count={count} speed={(count - last_count) / 10}") + last_count = count + finally: + conn.close() # ANCHOR_END: monitor # ANCHOR: main -def main(): +def main(infinity): set_global_config() logging.info(f"READ_TASK_COUNT={READ_TASK_COUNT}, WRITE_TASK_COUNT={WRITE_TASK_COUNT}, " f"TABLE_COUNT={TABLE_COUNT}, QUEUE_SIZE={QUEUE_SIZE}, MAX_BATCH_SIZE={MAX_BATCH_SIZE}") - monitor_process = Process(target=run_monitor_process) + conn = get_connection() + conn.execute("DROP DATABASE IF EXISTS test") + conn.execute("CREATE DATABASE IF NOT EXISTS test") + conn.execute("CREATE STABLE IF NOT EXISTS test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) " + "TAGS (location BINARY(64), groupId INT)") + conn.close() + + done_queue = Queue() + monitor_process = Process(target=run_monitor_process, args=(done_queue,)) monitor_process.start() - time.sleep(3) # waiting for database ready. + logging.debug(f"monitor task started with pid {monitor_process.pid}") task_queues: List[Queue] = [] + write_processes = [] + read_processes = [] + # create task queues for i in range(WRITE_TASK_COUNT): - queue = Queue(max_size_bytes=QUEUE_SIZE) + queue = Queue() task_queues.append(queue) # create write processes for i in range(WRITE_TASK_COUNT): - p = Process(target=run_write_task, args=(i, task_queues[i])) + p = Process(target=run_write_task, args=(i, task_queues[i], done_queue)) p.start() logging.debug(f"WriteTask-{i} started with pid {p.pid}") write_processes.append(p) @@ -151,13 +189,19 @@ def main(): # create read processes for i in range(READ_TASK_COUNT): queues = assign_queues(i, task_queues) - p = Process(target=run_read_task, args=(i, queues)) + p = Process(target=run_read_task, args=(i, queues, infinity)) p.start() logging.debug(f"ReadTask-{i} started with pid {p.pid}") read_processes.append(p) try: monitor_process.join() + for p in read_processes: + p.join() + for p in write_processes: + p.join() + time.sleep(1) + return except KeyboardInterrupt: monitor_process.terminate() [p.terminate() for p in read_processes] @@ -176,5 +220,6 @@ def assign_queues(read_task_id, task_queues): if __name__ == '__main__': - main() + multiprocessing.set_start_method('spawn') + main(False) # ANCHOR_END: main diff --git a/docs/examples/python/kafka_example.py b/docs/examples/python/kafka_example.py index 735059eec0..a89287d372 100644 --- a/docs/examples/python/kafka_example.py +++ b/docs/examples/python/kafka_example.py @@ -26,7 +26,8 @@ class Consumer(object): 'bath_consume': True, 'batch_size': 1000, 'async_model': True, - 'workers': 10 + 'workers': 10, + 'testing': False } LOCATIONS = ['California.SanFrancisco', 'California.LosAngles', 'California.SanDiego', 'California.SanJose', @@ -46,11 +47,12 @@ class Consumer(object): def __init__(self, **configs): self.config: dict = self.DEFAULT_CONFIGS self.config.update(configs) - self.consumer = KafkaConsumer( - self.config.get('kafka_topic'), # topic - bootstrap_servers=self.config.get('kafka_brokers'), - group_id=self.config.get('kafka_group_id'), - ) + if not self.config.get('testing'): + self.consumer = KafkaConsumer( + self.config.get('kafka_topic'), # topic + bootstrap_servers=self.config.get('kafka_brokers'), + group_id=self.config.get('kafka_group_id'), + ) self.taos = taos.connect( host=self.config.get('taos_host'), user=self.config.get('taos_user'), @@ -60,7 +62,7 @@ class Consumer(object): ) if self.config.get('async_model'): self.pool = ThreadPoolExecutor(max_workers=self.config.get('workers')) - self.tasks: list[Future] = [] + self.tasks = [] # tags and table mapping # key: {location}_{groupId} value: self.tag_table_mapping = {} i = 0 @@ -104,8 +106,8 @@ class Consumer(object): for task in self.tasks: while not task.done(): pass - if self.pool is not None: - self.pool.shutdown() + if self.pool is not None: + self.pool.shutdown() # clean data if self.config.get('clean_after_testing'): @@ -115,14 +117,14 @@ class Consumer(object): if self.taos is not None: self.taos.close() - def _run(self, f: Callable[[ConsumerRecord], bool]): + def _run(self, f): for message in self.consumer: if self.config.get('async_model'): self.pool.submit(f(message)) else: f(message) - def _run_batch(self, f: Callable[[list[list[ConsumerRecord]]], None]): + def _run_batch(self, f): while True: messages = self.consumer.poll(timeout_ms=500, max_records=self.config.get('batch_size')) if messages: @@ -140,7 +142,7 @@ class Consumer(object): logging.info('## insert sql %s', sql) return self.taos.execute(sql=sql) == 1 - def _to_taos_batch(self, messages: list[list[ConsumerRecord]]): + def _to_taos_batch(self, messages): sql = self._build_sql_batch(messages=messages) if len(sql) == 0: # decode error, skip return @@ -162,7 +164,7 @@ class Consumer(object): table_name = self._get_table_name(location=location, group_id=group_id) return self.INSERT_PART_SQL.format(table_name, ts, current, voltage, phase) - def _build_sql_batch(self, messages: list[list[ConsumerRecord]]) -> str: + def _build_sql_batch(self, messages) -> str: sql_list = [] for partition_messages in messages: for message in partition_messages: @@ -186,7 +188,55 @@ def _get_location_and_group(key: str) -> (str, int): return fields[0], fields[1] +def test_to_taos(consumer: Consumer): + msg = { + 'location': 'California.SanFrancisco', + 'groupId': 1, + 'ts': '2022-12-06 15:13:38.643', + 'current': 3.41, + 'voltage': 105, + 'phase': 0.02027, + } + record = ConsumerRecord(checksum=None, headers=None, offset=1, key=None, value=json.dumps(msg), partition=1, + topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None) + assert consumer._to_taos(message=record) + + +def test_to_taos_batch(consumer: Consumer): + records = [ + [ + ConsumerRecord(checksum=None, headers=None, offset=1, key=None, + value=json.dumps({'location': 'California.SanFrancisco', + 'groupId': 1, + 'ts': '2022-12-06 15:13:38.643', + 'current': 3.41, + 'voltage': 105, + 'phase': 0.02027, }), + partition=1, topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None), + ConsumerRecord(checksum=None, headers=None, offset=1, key=None, + value=json.dumps({'location': 'California.LosAngles', + 'groupId': 2, + 'ts': '2022-12-06 15:13:39.643', + 'current': 3.41, + 'voltage': 102, + 'phase': 0.02027, }), + partition=1, topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None), + ] + ] + + consumer._to_taos_batch(messages=records) + + if __name__ == '__main__': - consumer = Consumer(async_model=True) + consumer = Consumer(async_model=True, testing=True) + # init env consumer.init_env() - consumer.consume() \ No newline at end of file + # consumer.consume() + # test build sql + # test build sql batch + test_to_taos(consumer) + test_to_taos_batch(consumer) + \ No newline at end of file diff --git a/docs/examples/python/mockdatasource.py b/docs/examples/python/mockdatasource.py index 1c516a800e..15a7d2ff8c 100644 --- a/docs/examples/python/mockdatasource.py +++ b/docs/examples/python/mockdatasource.py @@ -10,13 +10,14 @@ class MockDataSource: "9.4,118,0.141,California.SanFrancisco,4" ] - def __init__(self, tb_name_prefix, table_count): + def __init__(self, tb_name_prefix, table_count, infinity=True): self.table_name_prefix = tb_name_prefix + "_" self.table_count = table_count self.max_rows = 10000000 self.current_ts = round(time.time() * 1000) - self.max_rows * 100 # [(tableId, tableName, values),] self.data = self._init_data() + self.infinity = infinity def _init_data(self): lines = self.samples * (self.table_count // 5 + 1) @@ -28,14 +29,19 @@ class MockDataSource: def __iter__(self): self.row = 0 - return self + if not self.infinity: + return iter(self._iter_data()) + else: + return self def __next__(self): """ next 1000 rows for each table. return: {tableId:[row,...]} """ - # generate 1000 timestamps + return self._iter_data() + + def _iter_data(self): ts = [] for _ in range(1000): self.current_ts += 100 @@ -47,3 +53,10 @@ class MockDataSource: rows = [table_name + ',' + t + ',' + values for t in ts] result.append((table_id, rows)) return result + + +if __name__ == '__main__': + datasource = MockDataSource('t', 10, False) + for data in datasource: + print(data) + \ No newline at end of file diff --git a/docs/examples/python/sql_writer.py b/docs/examples/python/sql_writer.py index 758167376b..db51bb7174 100644 --- a/docs/examples/python/sql_writer.py +++ b/docs/examples/python/sql_writer.py @@ -10,6 +10,7 @@ class SQLWriter: self._tb_tags = {} self._conn = get_connection_func() self._max_sql_length = self.get_max_sql_length() + self._conn.execute("create database if not exists test") self._conn.execute("USE test") def get_max_sql_length(self): @@ -20,7 +21,7 @@ class SQLWriter: return int(r[1]) return 1024 * 1024 - def process_lines(self, lines: str): + def process_lines(self, lines: [str]): """ :param lines: [[tbName,ts,current,voltage,phase,location,groupId]] """ @@ -60,6 +61,7 @@ class SQLWriter: buf.append(q) sql_len += len(q) sql += " ".join(buf) + self.create_tables() self.execute_sql(sql) self._tb_values.clear() @@ -88,3 +90,23 @@ class SQLWriter: except BaseException as e: self.log.error("Execute SQL: %s", sql) raise e + + def close(self): + if self._conn: + self._conn.close() + + +if __name__ == '__main__': + def get_connection_func(): + conn = taos.connect() + return conn + + + writer = SQLWriter(get_connection_func=get_connection_func) + writer.execute_sql( + "create stable if not exists meters (ts timestamp, current float, voltage int, phase float) " + "tags (location binary(64), groupId int)") + writer.execute_sql( + "INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) " + "VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32)") + \ No newline at end of file diff --git a/docs/examples/python/tmq_example.py b/docs/examples/python/tmq_example.py index a4625ca11a..32778e9f25 100644 --- a/docs/examples/python/tmq_example.py +++ b/docs/examples/python/tmq_example.py @@ -1,58 +1,55 @@ +from taos.tmq import Consumer import taos -from taos.tmq import * - -conn = taos.connect() - -print("init") -conn.execute("drop topic if exists topic_ctb_column") -conn.execute("drop database if exists py_tmq") -conn.execute("create database if not exists py_tmq vgroups 2") -conn.select_db("py_tmq") -conn.execute( - "create stable if not exists stb1 (ts timestamp, c1 int, c2 float, c3 binary(10)) tags(t1 int)" -) -conn.execute("create table if not exists tb1 using stb1 tags(1)") -conn.execute("create table if not exists tb2 using stb1 tags(2)") -conn.execute("create table if not exists tb3 using stb1 tags(3)") - -print("create topic") -conn.execute( - "create topic if not exists topic_ctb_column as select ts, c1, c2, c3 from stb1" -) - -print("build consumer") -conf = TaosTmqConf() -conf.set("group.id", "tg2") -conf.set("td.connect.user", "root") -conf.set("td.connect.pass", "taosdata") -conf.set("enable.auto.commit", "true") -def tmq_commit_cb_print(tmq, resp, offset, param=None): - print(f"commit: {resp}, tmq: {tmq}, offset: {offset}, param: {param}") +def init_tmq_env(db, topic): + conn = taos.connect() + conn.execute("drop topic if exists {}".format(topic)) + conn.execute("drop database if exists {}".format(db)) + conn.execute("create database if not exists {}".format(db)) + conn.select_db(db) + conn.execute( + "create stable if not exists stb1 (ts timestamp, c1 int, c2 float, c3 varchar(16)) tags(t1 int, t3 varchar(16))") + conn.execute("create table if not exists tb1 using stb1 tags(1, 't1')") + conn.execute("create table if not exists tb2 using stb1 tags(2, 't2')") + conn.execute("create table if not exists tb3 using stb1 tags(3, 't3')") + conn.execute("create topic if not exists {} as select ts, c1, c2, c3 from stb1".format(topic)) + conn.execute("insert into tb1 values (now, 1, 1.0, 'tmq test')") + conn.execute("insert into tb2 values (now, 2, 2.0, 'tmq test')") + conn.execute("insert into tb3 values (now, 3, 3.0, 'tmq test')") -conf.set_auto_commit_cb(tmq_commit_cb_print, None) -tmq = conf.new_consumer() +def cleanup(db, topic): + conn = taos.connect() + conn.execute("drop topic if exists {}".format(topic)) + conn.execute("drop database if exists {}".format(db)) -print("build topic list") -topic_list = TaosTmqList() -topic_list.append("topic_ctb_column") +if __name__ == '__main__': + init_tmq_env("tmq_test", "tmq_test_topic") # init env + consumer = Consumer( + { + "group.id": "tg2", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "enable.auto.commit": "true", + } + ) + consumer.subscribe(["tmq_test_topic"]) -print("basic consume loop") -tmq.subscribe(topic_list) + try: + while True: + res = consumer.poll(1) + if not res: + break + err = res.error() + if err is not None: + raise err + val = res.value() -sub_list = tmq.subscription() - -print("subscribed topics: ", sub_list) - -while 1: - res = tmq.poll(1000) - if res: - topic = res.get_topic_name() - vg = res.get_vgroup_id() - db = res.get_db_name() - print(f"topic: {topic}\nvgroup id: {vg}\ndb: {db}") - for row in res: - print(row) + for block in val: + print(block.fetchall()) + finally: + consumer.unsubscribe() + consumer.close() + cleanup("tmq_test", "tmq_test_topic") \ No newline at end of file diff --git a/include/common/tmsg.h b/include/common/tmsg.h index ad6077db09..0cc9fb8619 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -907,6 +907,7 @@ typedef struct { int32_t numOfRetensions; SArray* pRetensions; int8_t schemaless; + int16_t sstTrigger; } SDbCfgRsp; int32_t tSerializeSDbCfgRsp(void* buf, int32_t bufLen, const SDbCfgRsp* pRsp); diff --git a/include/common/tmsgcb.h b/include/common/tmsgcb.h index a1ebd855cd..eaac319141 100644 --- a/include/common/tmsgcb.h +++ b/include/common/tmsgcb.h @@ -39,7 +39,7 @@ typedef enum { QUEUE_MAX, } EQueueType; -typedef int32_t (*UpdateDnodeInfoFp)(void* pData, int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port); +typedef void (*UpdateDnodeInfoFp)(void* pData, int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port); typedef int32_t (*PutToQueueFp)(void* pMgmt, EQueueType qtype, SRpcMsg* pMsg); typedef int32_t (*GetQueueSizeFp)(void* pMgmt, int32_t vgId, EQueueType qtype); typedef int32_t (*SendReqFp)(const SEpSet* pEpSet, SRpcMsg* pMsg); @@ -70,7 +70,8 @@ void tmsgSendRsp(SRpcMsg* pMsg); void tmsgRegisterBrokenLinkArg(SRpcMsg* pMsg); void tmsgReleaseHandle(SRpcHandleInfo* pHandle, int8_t type); void tmsgReportStartup(const char* name, const char* desc); -int32_t tmsgUpdateDnodeInfo(int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port); +void tmsgUpdateDnodeInfo(int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port); +void tmsgUpdateDnodeEpSet(SEpSet* epset); #ifdef __cplusplus } diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index c00625c51c..c5352eee46 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -354,7 +354,8 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeSStreamTask(SStreamTask* pTask); static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem* pItem) { - if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { + int8_t type = pItem->type; + if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmitClone = streamSubmitRefClone((SStreamDataSubmit*)pItem); if (pSubmitClone == NULL) { qDebug("task %d %p submit enqueue failed since out of memory", pTask->taskId, pTask); @@ -365,19 +366,19 @@ static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem qDebug("task %d %p submit enqueue %p %p %p", pTask->taskId, pTask, pItem, pSubmitClone, pSubmitClone->data); taosWriteQitem(pTask->inputQueue->queue, pSubmitClone); // qStreamInput(pTask->exec.executor, pSubmitClone); - } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE || - pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || + type == STREAM_INPUT__REF_DATA_BLOCK) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); - } else if (pItem->type == STREAM_INPUT__CHECKPOINT) { + } else if (type == STREAM_INPUT__CHECKPOINT) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); - } else if (pItem->type == STREAM_INPUT__GET_RES) { + } else if (type == STREAM_INPUT__GET_RES) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); } - if (pItem->type != STREAM_INPUT__GET_RES && pItem->type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { + if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); } diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 4c23c1f557..defafce30e 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -193,7 +193,7 @@ typedef struct SSyncLogStore { SyncIndex (*syncLogLastIndex)(struct SSyncLogStore* pLogStore); SyncTerm (*syncLogLastTerm)(struct SSyncLogStore* pLogStore); - int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); + int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forcSync); int32_t (*syncLogGetEntry)(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); int32_t (*syncLogTruncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index a1ae1e429d..a0f421212a 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); +SWalRef *walRefFirstVer(SWal *, SWalRef *); SWalRef *walRefCommittedVer(SWal *); SWalRef *walOpenRef(SWal *); diff --git a/include/util/tdef.h b/include/util/tdef.h index 9626180b99..3a152a36a1 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -499,7 +499,7 @@ enum { #define DEFAULT_PAGESIZE 4096 #define VNODE_TIMEOUT_SEC 60 -#define MNODE_TIMEOUT_SEC 10 +#define MNODE_TIMEOUT_SEC 60 #ifdef __cplusplus } diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 2a078b5eab..dfdbaa6fdd 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -210,8 +210,8 @@ function install_bin() { [ -x ${install_main_dir}/bin/${serverName} ] && ${csudo}ln -s ${install_main_dir}/bin/${serverName} ${bin_link_dir}/${serverName} || : [ -x ${install_main_dir}/bin/${udfdName} ] && ${csudo}ln -s ${install_main_dir}/bin/${udfdName} ${bin_link_dir}/${udfdName} || : [ -x ${install_main_dir}/bin/${adapterName} ] && ${csudo}ln -s ${install_main_dir}/bin/${adapterName} ${bin_link_dir}/${adapterName} || : - [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -s ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${demoName} || : - [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -s ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName} || : + [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${demoName} || : + [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName} || : [ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -s ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName} || : [ -x ${install_main_dir}/bin/${xname} ] && ${csudo}ln -s ${install_main_dir}/bin/${xname} ${bin_link_dir}/${xname} || : [ -x ${install_main_dir}/bin/TDinsight.sh ] && ${csudo}ln -s ${install_main_dir}/bin/TDinsight.sh ${bin_link_dir}/TDinsight.sh || : @@ -743,6 +743,34 @@ function is_version_compatible() { esac } +deb_erase() { + confirm="" + while [ "" == "${confirm}" ]; do + echo -e -n "${RED}Existing TDengine deb is detected, do you want to remove it? [yes|no] ${NC}:" + read confirm + if [ "yes" == "$confirm" ]; then + ${csudo}dpkg --remove tdengine ||: + break + elif [ "no" == "$confirm" ]; then + break + fi + done +} + +rpm_erase() { + confirm="" + while [ "" == "${confirm}" ]; do + echo -e -n "${RED}Existing TDengine rpm is detected, do you want to remove it? [yes|no] ${NC}:" + read confirm + if [ "yes" == "$confirm" ]; then + ${csudo}rpm -e tdengine ||: + break + elif [ "no" == "$confirm" ]; then + break + fi + done +} + function updateProduct() { # Check if version compatible if ! is_version_compatible; then @@ -755,6 +783,13 @@ function updateProduct() { echo "File ${tarName} does not exist" exit 1 fi + + if echo $osinfo | grep -qwi "centos"; then + rpm -q tdengine 2>&1 > /dev/null && rpm_erase tdengine ||: + elif echo $osinfo | grep -qwi "ubuntu"; then + dpkg -l tdengine 2>&1 | grep ii > /dev/null && deb_erase tdengine ||: + fi + tar -zxf ${tarName} install_jemalloc diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 495c2cca9a..593a8fd20a 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -357,6 +357,7 @@ void doDestroyRequest(void *p) { taosMemoryFreeClear(pRequest->pDb); doFreeReqResultInfo(&pRequest->body.resInfo); + tsem_destroy(&pRequest->body.rspSem); taosArrayDestroy(pRequest->tableList); taosArrayDestroy(pRequest->dbList); @@ -371,6 +372,9 @@ void doDestroyRequest(void *p) { } if (pRequest->syncQuery) { + if (pRequest->body.param){ + tsem_destroy(&((SSyncQueryParam*)pRequest->body.param)->sem); + } taosMemoryFree(pRequest->body.param); } @@ -388,45 +392,6 @@ void destroyRequest(SRequestObj *pRequest) { removeRequest(pRequest->self); } -void taosClientCrash(int signum, void *sigInfo, void *context) { - taosIgnSignal(SIGTERM); - taosIgnSignal(SIGHUP); - taosIgnSignal(SIGINT); - taosIgnSignal(SIGBREAK); - -#if !defined(WINDOWS) - taosIgnSignal(SIGBUS); -#endif - taosIgnSignal(SIGABRT); - taosIgnSignal(SIGFPE); - taosIgnSignal(SIGSEGV); - - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen= -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - goto _return; - } else { - msgLen = strlen(pMsg); - } - } - -_return: - - taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); - -#ifdef _TD_DARWIN_64 - exit(signum); -#elif defined(WINDOWS) - exit(signum); -#endif -} - void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } static void *tscCrashReportThreadFp(void *param) { @@ -523,15 +488,26 @@ void tscStopCrashReport() { } } -static void tscSetSignalHandle() { -#if !defined(WINDOWS) - taosSetSignal(SIGBUS, taosClientCrash); -#endif - taosSetSignal(SIGABRT, taosClientCrash); - taosSetSignal(SIGFPE, taosClientCrash); - taosSetSignal(SIGSEGV, taosClientCrash); + +void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + + taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); } + void taos_init_imp(void) { // In the APIs of other program language, taos_cleanup is not available yet. // So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning. @@ -555,8 +531,6 @@ void taos_init_imp(void) { return; } - tscSetSignalHandle(); - initQueryModuleMsgHandle(); if (taosConvInit() != 0) { diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 53acafeeaa..b5b99e92b0 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -159,6 +159,12 @@ STscObj* taos_connect_internal(const char* ip, const char* user, const char* pas return taosConnectImpl(user, &secretEncrypt[0], localDb, NULL, NULL, *pInst, connType); } +void freeQueryParam(SSyncQueryParam* param) { + if (param == NULL) return; + tsem_destroy(¶m->sem); + taosMemoryFree(param); +} + int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, bool validateSql, SRequestObj** pRequest, int64_t reqid) { *pRequest = createRequest(connId, TSDB_SQL_SELECT, reqid); @@ -180,17 +186,18 @@ int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, (*pRequest)->sqlLen = sqlLen; (*pRequest)->validateOnly = validateSql; + SSyncQueryParam* newpParam; if (param == NULL) { - SSyncQueryParam* pParam = taosMemoryCalloc(1, sizeof(SSyncQueryParam)); - if (pParam == NULL) { + newpParam = taosMemoryCalloc(1, sizeof(SSyncQueryParam)); + if (newpParam == NULL) { destroyRequest(*pRequest); *pRequest = NULL; return TSDB_CODE_OUT_OF_MEMORY; } - tsem_init(&pParam->sem, 0, 0); - pParam->pRequest = (*pRequest); - param = pParam; + tsem_init(&newpParam->sem, 0, 0); + newpParam->pRequest = (*pRequest); + param = newpParam; } (*pRequest)->body.param = param; @@ -201,8 +208,7 @@ int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, if (err) { tscError("%" PRId64 " failed to add to request container, reqId:0x%" PRIx64 ", conn:%" PRId64 ", %s", (*pRequest)->self, (*pRequest)->requestId, pTscObj->id, sql); - - taosMemoryFree(param); + freeQueryParam(newpParam); destroyRequest(*pRequest); *pRequest = NULL; return TSDB_CODE_OUT_OF_MEMORY; @@ -214,6 +220,7 @@ int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, nodesCreateAllocator((*pRequest)->requestId, tsQueryNodeChunkSize, &((*pRequest)->allocatorRefId))) { tscError("%" PRId64 " failed to create node allocator, reqId:0x%" PRIx64 ", conn:%" PRId64 ", %s", (*pRequest)->self, (*pRequest)->requestId, pTscObj->id, sql); + freeQueryParam(newpParam); destroyRequest(*pRequest); *pRequest = NULL; return TSDB_CODE_OUT_OF_MEMORY; @@ -1253,7 +1260,7 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t int64_t transporterId = 0; asyncSendMsgToServer(pTscObj->pAppInfo->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body); - + tsem_wait(&pRequest->body.rspSem); if (pRequest->code != TSDB_CODE_SUCCESS) { const char* errorMsg = diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 15c1d65162..e5f677637e 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -509,9 +509,8 @@ void taos_stop_query(TAOS_RES *res) { SRequestObj *pRequest = (SRequestObj *)res; pRequest->killed = true; - int32_t numOfFields = taos_num_fields(pRequest); // It is not a query, no need to stop. - if (numOfFields == 0) { + if (NULL == pRequest->pQuery || QUERY_EXEC_MODE_SCHEDULE != pRequest->pQuery->execMode) { tscDebug("request 0x%" PRIx64 " no need to be killed since not query", pRequest->requestId); return; } diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index 8ceb2a3380..9a3838a6b4 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -1448,6 +1448,7 @@ int taos_write_raw_block_with_fields(TAOS* taos, int rows, char* pData, const ch end: taosMemoryFreeClear(pTableMeta); qDestroyQuery(pQuery); + destroyRequest(pRequest); taosMemoryFree(subReq); return code; } @@ -1639,6 +1640,7 @@ int taos_write_raw_block(TAOS* taos, int rows, char* pData, const char* tbname) end: taosMemoryFreeClear(pTableMeta); qDestroyQuery(pQuery); + destroyRequest(pRequest); taosMemoryFree(subReq); return code; } diff --git a/source/client/src/clientStmt.c b/source/client/src/clientStmt.c index 82ea9e0d8f..1ec6450228 100644 --- a/source/client/src/clientStmt.c +++ b/source/client/src/clientStmt.c @@ -300,11 +300,7 @@ int32_t stmtCleanExecInfo(STscStmt* pStmt, bool keepTable, bool deepClean) { continue; } - if (STMT_TYPE_MULTI_INSERT == pStmt->sql.type) { - qFreeStmtDataBlock(pBlocks); - } else { - qDestroyStmtDataBlock(pBlocks); - } + qDestroyStmtDataBlock(pBlocks); taosHashRemove(pStmt->exec.pBlockHash, key, keyLen); pIter = taosHashIterate(pStmt->exec.pBlockHash, pIter); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 95625e8d93..891f5e21ab 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -2821,8 +2821,8 @@ int32_t tSerializeSDbCfgRsp(void *buf, int32_t bufLen, const SDbCfgRsp *pRsp) { if (tEncodeI8(&encoder, pRetension->keepUnit) < 0) return -1; } if (tEncodeI8(&encoder, pRsp->schemaless) < 0) return -1; + if (tEncodeI16(&encoder, pRsp->sstTrigger) < 0) return -1; tEndEncode(&encoder); - int32_t tlen = encoder.pos; tEncoderClear(&encoder); return tlen; @@ -2873,6 +2873,7 @@ int32_t tDeserializeSDbCfgRsp(void *buf, int32_t bufLen, SDbCfgRsp *pRsp) { } } if (tDecodeI8(&decoder, &pRsp->schemaless) < 0) return -1; + if (tDecodeI16(&decoder, &pRsp->sstTrigger) < 0) return -1; tEndDecode(&decoder); tDecoderClear(&decoder); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 3ce37a5f8e..220e55f7f3 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -137,7 +137,7 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pNode->nodeId = pCreate->replicas[i].id; pNode->nodePort = pCreate->replicas[i].port; tstrncpy(pNode->nodeFqdn, pCreate->replicas[i].fqdn, TSDB_FQDN_LEN); - (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); + tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); } } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 693fe97daa..951544c766 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -79,8 +79,6 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { char path[TSDB_FILENAME_LEN] = {0}; - vnodeProposeCommitOnNeed(pVnode->pImpl); - taosThreadRwlockWrlock(&pMgmt->lock); taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t)); taosThreadRwlockUnlock(&pMgmt->lock); @@ -343,13 +341,12 @@ static void vmCheckSyncTimeout(SVnodeMgmt *pMgmt) { int32_t numOfVnodes = 0; SVnodeObj **ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); - for (int32_t i = 0; i < numOfVnodes; ++i) { - SVnodeObj *pVnode = ppVnodes[i]; - vnodeSyncCheckTimeout(pVnode->pImpl); - vmReleaseVnode(pMgmt, pVnode); - } - if (ppVnodes != NULL) { + for (int32_t i = 0; i < numOfVnodes; ++i) { + SVnodeObj *pVnode = ppVnodes[i]; + vnodeSyncCheckTimeout(pVnode->pImpl); + vmReleaseVnode(pMgmt, pVnode); + } taosMemoryFree(ppVnodes); } } diff --git a/source/dnode/mgmt/node_util/inc/dmUtil.h b/source/dnode/mgmt/node_util/inc/dmUtil.h index eb83bc9ff8..2609422ccc 100644 --- a/source/dnode/mgmt/node_util/inc/dmUtil.h +++ b/source/dnode/mgmt/node_util/inc/dmUtil.h @@ -108,6 +108,7 @@ typedef struct { bool stopped; SEpSet mnodeEps; SArray *dnodeEps; + SArray *oldDnodeEps; SHashObj *dnodeHash; TdThreadRwlock lock; SMsgCb msgCb; @@ -175,7 +176,7 @@ void dmUpdateEps(SDnodeData *pData, SArray *pDnodeEps); void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet); void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet); void dmSetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet); -int32_t dmUpdateDnodeInfo(void *pData, int32_t *dnodeId, int64_t *clusterId, char *fqdn, uint16_t *port); +void dmUpdateDnodeInfo(void *pData, int32_t *dnodeId, int64_t *clusterId, char *fqdn, uint16_t *port); #ifdef __cplusplus } diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 7bae703753..4285eb5c07 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -332,40 +332,48 @@ void dmSetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet) { } } -int32_t dmUpdateDnodeInfo(void *data, int32_t *dnodeId, int64_t *clusterId, char *fqdn, uint16_t *port) { +void dmUpdateDnodeInfo(void *data, int32_t *did, int64_t *clusterId, char *fqdn, uint16_t *port) { SDnodeData *pData = data; - int32_t ret = -1; + int32_t dnodeId = -1; + if (did != NULL) dnodeId = *did; + taosThreadRwlockRdlock(&pData->lock); - if (*dnodeId <= 0) { - for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pData->dnodeEps); ++i) { - SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i); + + if (pData->oldDnodeEps != NULL) { + int32_t size = (int32_t)taosArrayGetSize(pData->oldDnodeEps); + for (int32_t i = 0; i < size; ++i) { + SDnodeEp *pDnodeEp = taosArrayGet(pData->oldDnodeEps, i); if (strcmp(pDnodeEp->ep.fqdn, fqdn) == 0 && pDnodeEp->ep.port == *port) { - dInfo("dnode:%s:%u, update dnodeId from %d to %d", fqdn, *port, *dnodeId, pDnodeEp->id); - *dnodeId = pDnodeEp->id; - *clusterId = pData->clusterId; - ret = 0; - } - } - if (ret != 0) { - dInfo("dnode:%s:%u, failed to update dnodeId:%d", fqdn, *port, *dnodeId); - } - } else { - SDnodeEp *pDnodeEp = taosHashGet(pData->dnodeHash, dnodeId, sizeof(int32_t)); - if (pDnodeEp) { - if (strcmp(pDnodeEp->ep.fqdn, fqdn) != 0) { - dInfo("dnode:%d, update port from %s to %s", *dnodeId, fqdn, pDnodeEp->ep.fqdn); + dInfo("dnode:%d, update ep:%s:%u to %s:%u", dnodeId, fqdn, *port, pDnodeEp->ep.fqdn, pDnodeEp->ep.port); tstrncpy(fqdn, pDnodeEp->ep.fqdn, TSDB_FQDN_LEN); - } - if (pDnodeEp->ep.port != *port) { - dInfo("dnode:%d, update port from %u to %u", *dnodeId, *port, pDnodeEp->ep.port); *port = pDnodeEp->ep.port; } - *clusterId = pData->clusterId; - ret = 0; - } else { - dInfo("dnode:%d, failed to update dnode info", *dnodeId); } } + + if (did != NULL && dnodeId <= 0) { + int32_t size = (int32_t)taosArrayGetSize(pData->dnodeEps); + for (int32_t i = 0; i < size; ++i) { + SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i); + if (strcmp(pDnodeEp->ep.fqdn, fqdn) == 0 && pDnodeEp->ep.port == *port) { + dInfo("dnode:%s:%u, update dnodeId to dnode:%d", fqdn, *port, pDnodeEp->id); + *did = pDnodeEp->id; + if (clusterId != NULL) *clusterId = pData->clusterId; + } + } + } + + if (dnodeId > 0) { + SDnodeEp *pDnodeEp = taosHashGet(pData->dnodeHash, &dnodeId, sizeof(int32_t)); + if (pDnodeEp) { + if (strcmp(pDnodeEp->ep.fqdn, fqdn) != 0 || pDnodeEp->ep.port != *port) { + dInfo("dnode:%d, update ep:%s:%u to %s:%u", dnodeId, fqdn, *port, pDnodeEp->ep.fqdn, pDnodeEp->ep.port); + tstrncpy(fqdn, pDnodeEp->ep.fqdn, TSDB_FQDN_LEN); + *port = pDnodeEp->ep.port; + } + if (clusterId != NULL) *clusterId = pData->clusterId; + } + } + taosThreadRwlockUnlock(&pData->lock); - return ret; } \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 37e2c35225..3bbf4a4279 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -742,6 +742,7 @@ SSdbRow *mndConsumerActionDecode(SSdbRaw *pRaw) { if (tDecodeSMqConsumerObj(buf, pConsumer) == NULL) { goto CM_DECODE_OVER; } + tmsgUpdateDnodeEpSet(&pConsumer->ep); terrno = TSDB_CODE_SUCCESS; diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 7e5c29d56f..bdfda14a32 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -889,7 +889,7 @@ static int32_t mndProcessGetDbCfgReq(SRpcMsg *pReq) { cfgRsp.numOfRetensions = pDb->cfg.numOfRetensions; cfgRsp.pRetensions = pDb->cfg.pRetensions; cfgRsp.schemaless = pDb->cfg.schemaless; - + cfgRsp.sstTrigger = pDb->cfg.sstTrigger; int32_t contLen = tSerializeSDbCfgRsp(NULL, 0, &cfgRsp); void *pRsp = rpcMallocCont(contLen); if (pRsp == NULL) { diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index ddb54a95ea..f4e6aad7a7 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -180,6 +180,7 @@ static SSdbRow *mndDnodeActionDecode(SSdbRaw *pRaw) { SDB_GET_RESERVE(pRaw, dataPos, TSDB_DNODE_RESERVE_SIZE, _OVER) terrno = 0; + tmsgUpdateDnodeInfo(&pDnode->id, NULL, pDnode->fqdn, &pDnode->port); _OVER: if (terrno != 0) { diff --git a/source/dnode/mnode/impl/src/mndFunc.c b/source/dnode/mnode/impl/src/mndFunc.c index 31f31a15ba..244e6058d4 100644 --- a/source/dnode/mnode/impl/src/mndFunc.c +++ b/source/dnode/mnode/impl/src/mndFunc.c @@ -293,7 +293,7 @@ static int32_t mndProcessCreateFuncReq(SRpcMsg *pReq) { goto _OVER; } - mInfo("func:%s, start to create", createReq.name); + mInfo("func:%s, start to create, size:%d", createReq.name, createReq.codeLen); if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_CREATE_FUNC) != 0) { goto _OVER; } diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 9b3934c40c..7dcd287fb7 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -15,13 +15,13 @@ #define _DEFAULT_SOURCE #include "mndMnode.h" +#include "mndCluster.h" #include "mndDnode.h" #include "mndPrivilege.h" #include "mndShow.h" #include "mndSync.h" #include "mndTrans.h" #include "tmisce.h" -#include "mndCluster.h" #define MNODE_VER_NUMBER 1 #define MNODE_RESERVE_SIZE 64 @@ -181,9 +181,8 @@ _OVER: static int32_t mndMnodeActionInsert(SSdb *pSdb, SMnodeObj *pObj) { mTrace("mnode:%d, perform insert action, row:%p", pObj->id, pObj); - pObj->pDnode = sdbAcquire(pSdb, SDB_DNODE, &pObj->id); + pObj->pDnode = sdbAcquireNotReadyObj(pSdb, SDB_DNODE, &pObj->id); if (pObj->pDnode == NULL) { - terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; mError("mnode:%d, failed to perform insert action since %s", pObj->id, terrstr()); return -1; } @@ -748,7 +747,7 @@ static void mndReloadSyncConfig(SMnode *pMnode) { pNode->clusterId = mndGetClusterId(pMnode); pNode->nodePort = pObj->pDnode->port; tstrncpy(pNode->nodeFqdn, pObj->pDnode->fqdn, TSDB_FQDN_LEN); - (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); + tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); mInfo("vgId:1, ep:%s:%u dnode:%d", pNode->nodeFqdn, pNode->nodePort, pNode->nodeId); if (pObj->pDnode->id == pMnode->selfDnodeId) { cfg.myIndex = cfg.replicaNum; diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 141bb1df60..fe0dc9e857 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -202,11 +202,13 @@ static SSdbRow *mndSmaActionDecode(SSdbRaw *pRaw) { _OVER: if (terrno != 0) { - mError("sma:%s, failed to decode from raw:%p since %s", pSma == NULL ? "null" : pSma->name, pRaw, terrstr()); - taosMemoryFreeClear(pSma->expr); - taosMemoryFreeClear(pSma->tagsFilter); - taosMemoryFreeClear(pSma->sql); - taosMemoryFreeClear(pSma->ast); + if (pSma != NULL) { + mError("sma:%s, failed to decode from raw:%p since %s", pSma->name, pRaw, terrstr()); + taosMemoryFreeClear(pSma->expr); + taosMemoryFreeClear(pSma->tagsFilter); + taosMemoryFreeClear(pSma->sql); + taosMemoryFreeClear(pSma->ast); + } taosMemoryFreeClear(pRow); return NULL; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index b8ef185199..153bb8bd04 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -760,6 +760,27 @@ static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw) { goto SUB_DECODE_OVER; } + // update epset saved in mnode + if (pSub->unassignedVgs != NULL) { + int32_t size = (int32_t)taosArrayGetSize(pSub->unassignedVgs); + for (int32_t i = 0; i < size; ++i) { + SMqVgEp *pMqVgEp = taosArrayGet(pSub->unassignedVgs, i); + tmsgUpdateDnodeEpSet(&pMqVgEp->epSet); + } + } + if (pSub->consumerHash != NULL) { + void *pIter = taosHashIterate(pSub->consumerHash, NULL); + while (pIter) { + SMqConsumerEp *pConsumerEp = pIter; + int32_t size = (int32_t)taosArrayGetSize(pConsumerEp->vgs); + for (int32_t i = 0; i < size; ++i) { + SMqVgEp *pMqVgEp = taosArrayGet(pConsumerEp->vgs, i); + tmsgUpdateDnodeEpSet(&pMqVgEp->epSet); + } + pIter = taosHashIterate(pSub->consumerHash, pIter); + } + } + terrno = TSDB_CODE_SUCCESS; SUB_DECODE_OVER: diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 93c9192bed..6b675586e4 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -271,9 +271,11 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { int32_t mndInitSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; taosThreadMutexInit(&pMgmt->lock, NULL); + taosThreadMutexLock(&pMgmt->lock); pMgmt->transId = 0; pMgmt->transSec = 0; pMgmt->transSeq = 0; + taosThreadMutexUnlock(&pMgmt->lock); SSyncInfo syncInfo = { .snapshotStrategy = SYNC_STRATEGY_STANDARD_SNAPSHOT, @@ -301,7 +303,7 @@ int32_t mndInitSync(SMnode *pMnode) { pNode->nodeId = pMgmt->replicas[i].id; pNode->nodePort = pMgmt->replicas[i].port; tstrncpy(pNode->nodeFqdn, pMgmt->replicas[i].fqdn, sizeof(pNode->nodeFqdn)); - (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); + tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); mInfo("vgId:1, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId, pNode->clusterId); } @@ -369,6 +371,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { if (pMgmt->transId != 0) { mError("trans:%d, can't be proposed since trans:%d already waiting for confirm", transId, pMgmt->transId); taosThreadMutexUnlock(&pMgmt->lock); + rpcFreeCont(req.pCont); terrno = TSDB_CODE_MND_LAST_TRANS_NOT_FINISHED; return terrno; } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 718fc5c73f..dfcd55bcba 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -329,6 +329,7 @@ static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) { action.pRaw = NULL; } else if (action.actionType == TRANS_ACTION_MSG) { SDB_GET_BINARY(pRaw, dataPos, (void *)&action.epSet, sizeof(SEpSet), _OVER); + tmsgUpdateDnodeEpSet(&action.epSet); SDB_GET_INT16(pRaw, dataPos, &action.msgType, _OVER) SDB_GET_INT8(pRaw, dataPos, &unused /*&action.msgSent*/, _OVER) SDB_GET_INT8(pRaw, dataPos, &unused /*&action.msgReceived*/, _OVER) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 2550c68cfb..54ea9e7b24 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -1441,10 +1441,10 @@ static int32_t mndRedistributeVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, { SSdbRaw *pRaw = mndVgroupActionEncode(&newVg); - if (pRaw == NULL) return -1; + if (pRaw == NULL) goto _OVER; if (mndTransAppendCommitlog(pTrans, pRaw) != 0) { sdbFreeRaw(pRaw); - return -1; + goto _OVER; } (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); } diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index e799f08a17..5a44e4279f 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -291,6 +291,7 @@ int32_t sdbWriteWithoutFree(SSdb *pSdb, SSdbRaw *pRaw); * @return void* The object of the row. */ void *sdbAcquire(SSdb *pSdb, ESdbType type, const void *pKey); +void *sdbAcquireNotReadyObj(SSdb *pSdb, ESdbType type, const void *pKey); /** * @brief Release a row from sdb. diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 9e830b83e6..c2d7a9757a 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -228,11 +228,12 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { int32_t readLen = 0; int64_t ret = 0; char file[PATH_MAX] = {0}; + int32_t bufLen = TSDB_MAX_MSG_SIZE; snprintf(file, sizeof(file), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); mInfo("start to read sdb file:%s", file); - SSdbRaw *pRaw = taosMemoryMalloc(TSDB_MAX_MSG_SIZE + 100); + SSdbRaw *pRaw = taosMemoryMalloc(bufLen + 100); if (pRaw == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed read sdb file since %s", terrstr()); @@ -275,14 +276,15 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { } readLen = pRaw->dataLen + sizeof(int32_t); - if (readLen >= pRaw->dataLen) { - SSdbRaw *pNewRaw = taosMemoryMalloc(pRaw->dataLen + TSDB_MAX_MSG_SIZE); + if (readLen >= bufLen) { + bufLen = pRaw->dataLen * 2; + SSdbRaw *pNewRaw = taosMemoryMalloc(bufLen + 100); if (pNewRaw == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - mError("failed read sdb file since malloc new sdbRaw size:%d failed", pRaw->dataLen + TSDB_MAX_MSG_SIZE); + mError("failed read sdb file since malloc new sdbRaw size:%d failed", bufLen); goto _OVER; } - mInfo("malloc new sdbRaw size:%d, type:%d", pRaw->dataLen + TSDB_MAX_MSG_SIZE, pRaw->type); + mInfo("malloc new sdb raw size:%d, type:%d", bufLen, pRaw->type); memcpy(pNewRaw, pRaw, sizeof(SSdbRaw)); sdbFreeRaw(pRaw); pRaw = pNewRaw; diff --git a/source/dnode/mnode/sdb/src/sdbHash.c b/source/dnode/mnode/sdb/src/sdbHash.c index 32b34ea3a3..505dee3d87 100644 --- a/source/dnode/mnode/sdb/src/sdbHash.c +++ b/source/dnode/mnode/sdb/src/sdbHash.c @@ -270,7 +270,7 @@ int32_t sdbWrite(SSdb *pSdb, SSdbRaw *pRaw) { return code; } -void *sdbAcquire(SSdb *pSdb, ESdbType type, const void *pKey) { +void *sdbAcquireAll(SSdb *pSdb, ESdbType type, const void *pKey, bool onlyReady) { terrno = 0; SHashObj *hash = sdbGetHash(pSdb, type); @@ -306,10 +306,24 @@ void *sdbAcquire(SSdb *pSdb, ESdbType type, const void *pKey) { break; } + if (pRet == NULL) { + if (!onlyReady) { + terrno = 0; + atomic_add_fetch_32(&pRow->refCount, 1); + pRet = pRow->pObj; + sdbPrintOper(pSdb, pRow, "acquire"); + } + } + sdbUnLock(pSdb, type); return pRet; } +void *sdbAcquire(SSdb *pSdb, ESdbType type, const void *pKey) { return sdbAcquireAll(pSdb, type, pKey, true); } +void *sdbAcquireNotReadyObj(SSdb *pSdb, ESdbType type, const void *pKey) { + return sdbAcquireAll(pSdb, type, pKey, false); +} + static void sdbCheckRow(SSdb *pSdb, SSdbRow *pRow) { int32_t type = pRow->type; sdbWriteLock(pSdb, type); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index ebdc49d967..d6f59b125f 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -153,6 +153,8 @@ typedef struct SMTbCursor SMTbCursor; SMTbCursor *metaOpenTbCursor(SMeta *pMeta); void metaCloseTbCursor(SMTbCursor *pTbCur); int32_t metaTbCursorNext(SMTbCursor *pTbCur); +int32_t metaTbCursorPrev(SMTbCursor *pTbCur); + #endif // tsdb diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 2536ec621f..efa3af5cbd 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -202,6 +202,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol uint8_t **ppBuf); int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, uint8_t **ppBuf); +int32_t tRowInfoCmprFn(const void *p1, const void *p2); // tsdbMemTable ============================================================================================== // SMemTable int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 75367883f1..2501af7f04 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -247,7 +247,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter); -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr); int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); // STqSnapshotReader == diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 58d0711389..ce6a8de8c5 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -311,7 +311,7 @@ void metaCloseTbCursor(SMTbCursor *pTbCur) { } } -int metaTbCursorNext(SMTbCursor *pTbCur) { +int32_t metaTbCursorNext(SMTbCursor *pTbCur) { int ret; void *pBuf; STbCfg tbCfg; @@ -335,6 +335,31 @@ int metaTbCursorNext(SMTbCursor *pTbCur) { return 0; } +int32_t metaTbCursorPrev(SMTbCursor *pTbCur) { + int ret; + void *pBuf; + STbCfg tbCfg; + + for (;;) { + ret = tdbTbcPrev(pTbCur->pDbc, &pTbCur->pKey, &pTbCur->kLen, &pTbCur->pVal, &pTbCur->vLen); + if (ret < 0) { + return -1; + } + + tDecoderClear(&pTbCur->mr.coder); + + metaGetTableEntryByVersion(&pTbCur->mr, ((SUidIdxVal *)pTbCur->pVal)[0].version, *(tb_uid_t *)pTbCur->pKey); + if (pTbCur->mr.me.type == TSDB_SUPER_TABLE) { + continue; + } + + break; + } + + return 0; +} + + SSchemaWrapper *metaGetTableSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver, int lock) { void *pData = NULL; int nData = 0; diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index 34f884f9f9..0a6fac0fe7 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -423,10 +423,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) // rsma1/rsma2 if (pHdr->type == SNAP_DATA_RSMA1) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr); } else if (pHdr->type == SNAP_DATA_RSMA2) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr); } else if (pHdr->type == SNAP_DATA_QTASK) { code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData); } else { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1d5fae33eb..b195cfafb0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -521,7 +521,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqOffsetResetToData(&fetchOffsetNew, 0, 0); } } else { - tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal)); + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1); } } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { @@ -719,6 +724,8 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) { SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; + tqDebug("vgId:%d, delete sub: %s", pTq->pVnode->config.vgId, pReq->subKey); + taosWLockLatch(&pTq->pushLock); int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey)); if (code != 0) { diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index f05f5d5c88..a837543e62 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -164,7 +164,7 @@ void* tsdbCacherowsReaderClose(void* pReader) { destroyLastBlockLoadInfo(p->pLoadInfo); - taosMemoryFree((void*) p->idstr); + taosMemoryFree((void*)p->idstr); taosMemoryFree(pReader); return NULL; } @@ -241,7 +241,11 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 taosArrayPush(pLastCols, &p); } - tsdbTakeReadSnap(pr->pVnode->pTsdb, &pr->pReadSnap, "cache-l"); + code = tsdbTakeReadSnap(pr->pVnode->pTsdb, &pr->pReadSnap, "cache-l"); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + pr->pDataFReader = NULL; pr->pDataFReaderLast = NULL; @@ -252,7 +256,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 code = doExtractCacheRow(pr, lruCache, pKeyInfo->uid, &pRow, &h); if (code != TSDB_CODE_SUCCESS) { - return code; + goto _end; } if (h == NULL) { @@ -321,7 +325,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 STableKeyInfo* pKeyInfo = &pr->pTableList[i]; code = doExtractCacheRow(pr, lruCache, pKeyInfo->uid, &pRow, &h); if (code != TSDB_CODE_SUCCESS) { - return code; + goto _end; } if (h == NULL) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 7dc839773f..51fdc69a95 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -458,9 +458,8 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pHeadF); } } else { - nRef = pHeadF->nRef; - *pHeadF = *pSetNew->pHeadF; - pHeadF->nRef = nRef; + ASSERT(pHeadF->offset == pSetNew->pHeadF->offset); + ASSERT(pHeadF->size == pSetNew->pHeadF->size); } // data @@ -481,9 +480,7 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pDataF); } } else { - nRef = pDataF->nRef; - *pDataF = *pSetNew->pDataF; - pDataF->nRef = nRef; + pDataF->size = pSetNew->pDataF->size; } // sma @@ -504,9 +501,7 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pSmaF); } } else { - nRef = pSmaF->nRef; - *pSmaF = *pSetNew->pSmaF; - pSmaF->nRef = nRef; + pSmaF->size = pSetNew->pSmaF->size; } // stt diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 08d52554c6..98c9c0fdda 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -15,274 +15,628 @@ #include "tsdb.h" -// STsdbSnapReader ======================================== -typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT; -typedef struct { - SRBTreeNode n; - SRowInfo rInfo; - EFIterT type; - union { - struct { - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mBlock; - int32_t iBlock; - }; // .data file - struct { - int32_t iStt; - SArray* aSttBlk; - int32_t iSttBlk; - }; // .stt file - }; - SBlockData bData; - int32_t iRow; -} SFDataIter; - -struct STsdbSnapReader { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - STsdbFS fs; - int8_t type; - // for data file - int8_t dataDone; - int32_t fid; - SDataFReader* pDataFReader; - SFDataIter* pIter; - SRBTree rbt; - SFDataIter aFDataIter[TSDB_MAX_STT_TRIGGER + 1]; - SBlockData bData; - SSkmInfo skmTable; - // for del file - int8_t delDone; - SDelFReader* pDelFReader; - SArray* aDelIdx; // SArray - int32_t iDelIdx; - SArray* aDelData; // SArray - uint8_t* aBuf[5]; -}; - -extern int32_t tRowInfoCmprFn(const void* p1, const void* p2); extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); +extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); +extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); -static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { - SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n)); - SFDataIter* pIter2 = (SFDataIter*)(((uint8_t*)pNode2) - offsetof(SFDataIter, n)); +// STsdbDataIter2 ======================================== +#define TSDB_MEM_TABLE_DATA_ITER 0 +#define TSDB_DATA_FILE_DATA_ITER 1 +#define TSDB_STT_FILE_DATA_ITER 2 +#define TSDB_TOMB_FILE_DATA_ITER 3 - return tRowInfoCmprFn(&pIter1->rInfo, &pIter2->rInfo); -} +typedef struct STsdbDataIter2 STsdbDataIter2; +typedef struct STsdbFilterInfo STsdbFilterInfo; -static int32_t tsdbSnapReadOpenFile(STsdbSnapReader* pReader) { +typedef struct { + int64_t suid; + int64_t uid; + SDelData delData; +} SDelInfo; + +struct STsdbDataIter2 { + STsdbDataIter2* next; + SRBTreeNode rbtn; + + int32_t type; + SRowInfo rowInfo; + SDelInfo delInfo; + union { + // TSDB_MEM_TABLE_DATA_ITER + struct { + SMemTable* pMemTable; + } mIter; + + // TSDB_DATA_FILE_DATA_ITER + struct { + SDataFReader* pReader; + SArray* aBlockIdx; // SArray + SMapData mDataBlk; + SBlockData bData; + int32_t iBlockIdx; + int32_t iDataBlk; + int32_t iRow; + } dIter; + + // TSDB_STT_FILE_DATA_ITER + struct { + SDataFReader* pReader; + int32_t iStt; + SArray* aSttBlk; + SBlockData bData; + int32_t iSttBlk; + int32_t iRow; + } sIter; + // TSDB_TOMB_FILE_DATA_ITER + struct { + SDelFReader* pReader; + SArray* aDelIdx; + SArray* aDelData; + int32_t iDelIdx; + int32_t iDelData; + } tIter; + }; +}; + +#define TSDB_FILTER_FLAG_BY_VERSION 0x1 +struct STsdbFilterInfo { + int32_t flag; + int64_t sver; + int64_t ever; +}; + +#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn))) + +/* open */ +static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** ppIter) { int32_t code = 0; int32_t lino = 0; - SDFileSet dFileSet = {.fid = pReader->fid}; - SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT); - if (pSet == NULL) return code; - - pReader->fid = pSet->fid; - code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); - TSDB_CHECK_CODE(code, lino, _exit); - - pReader->pIter = NULL; - tRBTreeCreate(&pReader->rbt, tFDataIterCmprFn); - - // .data file - SFDataIter* pIter = &pReader->aFDataIter[0]; - pIter->type = SNAP_DATA_FILE_ITER; - - code = tsdbReadBlockIdx(pReader->pDataFReader, pIter->aBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - for (pIter->iBlockIdx = 0; pIter->iBlockIdx < taosArrayGetSize(pIter->aBlockIdx); pIter->iBlockIdx++) { - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); - - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); - - for (pIter->iBlock = 0; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); - - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; - - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - - ASSERT(pIter->pBlockIdx->suid == pIter->bData.suid); - ASSERT(pIter->pBlockIdx->uid == pIter->bData.uid); - - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; - - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->pBlockIdx->suid; - pIter->rInfo.uid = pIter->pBlockIdx->uid; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter_and_break; - } - } - } - - continue; - - _add_iter_and_break: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - break; } - // .stt file - pIter = &pReader->aFDataIter[1]; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - pIter->type = SNAP_STT_FILE_ITER; - pIter->iStt = iStt; - - code = tsdbReadSttBlk(pReader->pDataFReader, iStt, pIter->aSttBlk); + pIter->type = TSDB_DATA_FILE_DATA_ITER; + pIter->dIter.pReader = pReader; + if ((pIter->dIter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); + } - for (pIter->iSttBlk = 0; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); + code = tBlockDataCreate(&pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); - if (pSttBlk->minVer > pReader->ever) continue; - if (pSttBlk->maxVer < pReader->sver) continue; + pIter->dIter.iBlockIdx = 0; + pIter->dIter.iDataBlk = 0; + pIter->dIter.iRow = 0; - code = tsdbReadSttBlockEx(pReader->pDataFReader, iStt, pSttBlk, &pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); + // read data + code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; + if (taosArrayGetSize(pIter->dIter.aBlockIdx) == 0) goto _clear; - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter; +_exit: + if (code) { + if (pIter) { + _clear: + tBlockDataDestroy(&pIter->dIter.bData, 1); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pIter->type = TSDB_STT_FILE_DATA_ITER; + pIter->sIter.pReader = pReader; + pIter->sIter.iStt = iStt; + pIter->sIter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pIter->sIter.aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataCreate(&pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->sIter.iSttBlk = 0; + pIter->sIter.iRow = 0; + + // read data + code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->sIter.aSttBlk) == 0) goto _clear; + +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->sIter.aSttBlk); + tBlockDataDestroy(&pIter->sIter.bData, 1); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +static int32_t tsdbOpenTombFileDataIter(SDelFReader* pReader, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + pIter->type = TSDB_TOMB_FILE_DATA_ITER; + + pIter->tIter.pReader = pReader; + if ((pIter->tIter.aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pIter->tIter.aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadDelIdx(pReader, pIter->tIter.aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->tIter.aDelIdx) == 0) goto _clear; + + pIter->tIter.iDelIdx = 0; + pIter->tIter.iDelData = 0; + +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->tIter.aDelIdx); + taosArrayDestroy(pIter->tIter.aDelData); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +/* close */ +static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->dIter.bData, 1); + tMapDataClear(&pIter->dIter.mDataBlk); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); +} + +static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->sIter.bData, 1); + taosArrayDestroy(pIter->sIter.aSttBlk); + taosMemoryFree(pIter); +} + +static void tsdbCloseTombFileDataIter(STsdbDataIter2* pIter) { + taosArrayDestroy(pIter->tIter.aDelData); + taosArrayDestroy(pIter->tIter.aDelIdx); + taosMemoryFree(pIter); +} + +static void tsdbCloseDataIter2(STsdbDataIter2* pIter) { + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + tsdbCloseDataFileDataIter(pIter); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + tsdbCloseSttFileDataIter(pIter); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + tsdbCloseTombFileDataIter(pIter); + } else { + ASSERT(0); + } +} + +/* cmpr */ +static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { + STsdbDataIter2* pIter1 = TSDB_RBTN_TO_DATA_ITER(pNode1); + STsdbDataIter2* pIter2 = TSDB_RBTN_TO_DATA_ITER(pNode2); + return tRowInfoCmprFn(&pIter1->rowInfo, &pIter2->rowInfo); +} + +/* seek */ + +/* iter next */ +static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + while (pIter->dIter.iRow < pIter->dIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pIter->dIter.bData.aVersion[pIter->dIter.iRow] < pFilterInfo->sver || + pIter->dIter.bData.aVersion[pIter->dIter.iRow] > pFilterInfo->ever) { + pIter->dIter.iRow++; + continue; + } + } + } + + pIter->rowInfo.suid = pIter->dIter.bData.suid; + pIter->rowInfo.uid = pIter->dIter.bData.uid; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow); + pIter->dIter.iRow++; + goto _exit; + } + + for (;;) { + while (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + // filter + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > dataBlk.maxVer || pFilterInfo->ever < dataBlk.minVer) { + pIter->dIter.iDataBlk++; + continue; + } + } + } + + code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iDataBlk++; + pIter->dIter.iRow = 0; + + break; + } + + if (pIter->dIter.iRow < pIter->dIter.bData.nRow) break; + + for (;;) { + if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { + SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); + + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iBlockIdx++; + pIter->dIter.iDataBlk = 0; + + break; + } else { + pIter->rowInfo = (SRowInfo){0}; + goto _exit; } } } - - continue; - - _add_iter: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - pIter++; } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbInfo("vgId:%d, %s done, path:%s, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path, - pReader->fid); + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); } return code; } -static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) { +static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; + int32_t lino = 0; - if (pReader->pIter) { - SFDataIter* pIter = NULL; - while (true) { - _find_row: - pIter = pReader->pIter; - for (pIter->iRow++; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; - - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _out; - } - } - - if (pIter->type == SNAP_DATA_FILE_ITER) { - while (true) { - for (pIter->iBlock++; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); - - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; - - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - if (code) goto _err; - - pIter->iRow = -1; - goto _find_row; + for (;;) { + while (pIter->sIter.iRow < pIter->sIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pIter->sIter.bData.aVersion[pIter->sIter.iRow] || + pFilterInfo->ever < pIter->sIter.bData.aVersion[pIter->sIter.iRow]) { + pIter->sIter.iRow++; + continue; } - - pIter->iBlockIdx++; - if (pIter->iBlockIdx >= taosArrayGetSize(pIter->aBlockIdx)) break; - - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); - if (code) goto _err; - pIter->iBlock = -1; } - - pReader->pIter = NULL; - break; - } else if (pIter->type == SNAP_STT_FILE_ITER) { - for (pIter->iSttBlk++; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); - - if (pSttBlk->minVer > pReader->ever || pSttBlk->maxVer < pReader->sver) continue; - - code = tsdbReadSttBlockEx(pReader->pDataFReader, pIter->iStt, pSttBlk, &pIter->bData); - if (code) goto _err; - - pIter->iRow = -1; - goto _find_row; - } - - pReader->pIter = NULL; - break; - } else { - ASSERT(0); } + + pIter->rowInfo.suid = pIter->sIter.bData.suid; + pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow]; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow); + pIter->sIter.iRow++; + goto _exit; } - _out: - pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); - if (pReader->pIter && pIter) { - int32_t c = tRowInfoCmprFn(&pReader->pIter->rInfo, &pIter->rInfo); - if (c > 0) { - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pReader->pIter); - pReader->pIter = NULL; + for (;;) { + if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { + SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); + + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pSttBlk->maxVer || pFilterInfo->ever < pSttBlk->minVer) { + pIter->sIter.iSttBlk++; + continue; + } + } + } + + code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->sIter.iRow = 0; + pIter->sIter.iSttBlk++; + break; } else { - ASSERT(c); + pIter->rowInfo = (SRowInfo){0}; + goto _exit; + } + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbTombFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + while (pIter->tIter.iDelData < taosArrayGetSize(pIter->tIter.aDelData)) { + SDelData* pDelData = taosArrayGet(pIter->tIter.aDelData, pIter->tIter.iDelData); + + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pDelData->version || pFilterInfo->ever < pDelData->version) { + pIter->tIter.iDelData++; + continue; + } + } + } + + pIter->delInfo.delData = *pDelData; + pIter->tIter.iDelData++; + goto _exit; + } + + for (;;) { + if (pIter->tIter.iDelIdx < taosArrayGetSize(pIter->tIter.aDelIdx)) { + SDelIdx* pDelIdx = taosArrayGet(pIter->tIter.aDelIdx, pIter->tIter.iDelIdx); + + code = tsdbReadDelData(pIter->tIter.pReader, pDelIdx, pIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->delInfo.suid = pDelIdx->suid; + pIter->delInfo.uid = pDelIdx->uid; + pIter->tIter.iDelData = 0; + pIter->tIter.iDelIdx++; + break; + } else { + pIter->delInfo = (SDelInfo){0}; + goto _exit; + } + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + return code; + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + return tsdbDataFileDataIterNext(pIter, pFilterInfo); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + return tsdbSttFileDataIterNext(pIter, pFilterInfo); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + return tsdbTombFileDataIterNext(pIter, pFilterInfo); + } else { + ASSERT(0); + return code; + } +} + +/* get */ + +// STsdbSnapReader ======================================== +struct STsdbSnapReader { + STsdb* pTsdb; + int64_t sver; + int64_t ever; + int8_t type; + uint8_t* aBuf[5]; + + STsdbFS fs; + TABLEID tbid; + SSkmInfo skmTable; + + // timeseries data + int8_t dataDone; + int32_t fid; + + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pIter; + SRBTree rbt; + SBlockData bData; + + // tombstone data + int8_t delDone; + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; + SArray* aDelData; +}; + +static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) { + int32_t code = 0; + int32_t lino = 0; + + SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); + if (pSet == NULL) { + pReader->fid = INT32_MAX; + goto _exit; + } + + pReader->fid = pSet->fid; + + tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn); + + code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter) { + // iter to next with filter info (sver, ever) + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); + } + } + + for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) { + code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter) { + // iter to valid row + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); + } + } + } + + pReader->pIter = NULL; + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid); + } + return code; +} + +static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) { + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + + tsdbDataFReaderClose(&pReader->pDataFReader); +} + +static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + if (pReader->pIter) { + code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) { + pReader->pIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + pReader->pIter = NULL; + } else if (c == 0) { + ASSERT(0); + } } } } if (pReader->pIter == NULL) { - pReader->pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); - if (pReader->pIter) { - tRBTreeDrop(&pReader->rbt, (SRBTreeNode*)pReader->pIter); + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + tRBTreeDrop(&pReader->rbt, pNode); + pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); } } - return code; + if (ppRowInfo) { + if (pReader->pIter) { + *ppRowInfo = &pReader->pIter->rowInfo; + } else { + *ppRowInfo = NULL; + } + } -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static SRowInfo* tsdbSnapGetRow(STsdbSnapReader* pReader) { +static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { if (pReader->pIter) { - return &pReader->pIter->rInfo; - } else { - tsdbSnapNextRow(pReader); - - if (pReader->pIter) { - return &pReader->pIter->rInfo; - } else { - return NULL; - } + *ppRowInfo = &pReader->pIter->rowInfo; + return 0; } + + return tsdbSnapReadNextRow(pReader, ppRowInfo); } static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) { @@ -318,155 +672,213 @@ _exit: return code; } -static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; STsdb* pTsdb = pReader->pTsdb; - while (true) { + tBlockDataReset(&pReader->bData); + + for (;;) { + // start a new file read if need if (pReader->pDataFReader == NULL) { - code = tsdbSnapReadOpenFile(pReader); + code = tsdbSnapReadFileDataStart(pReader); TSDB_CHECK_CODE(code, lino, _exit); } if (pReader->pDataFReader == NULL) break; - SRowInfo* pRowInfo = tsdbSnapGetRow(pReader); + SRowInfo* pRowInfo; + code = tsdbSnapReadGetRow(pReader, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); + tsdbSnapReadFileDataEnd(pReader); continue; } - TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid}; - SBlockData* pBlockData = &pReader->bData; - - code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, id.suid, id.uid, &pReader->skmTable); + code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataInit(pBlockData, &id, pReader->skmTable.pTSchema, NULL, 0); + code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); - while (pRowInfo->suid == id.suid && pRowInfo->uid == id.uid) { - code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pRowInfo->uid); - TSDB_CHECK_CODE(code, lino, _exit); + do { + if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break; - code = tsdbSnapNextRow(pReader); - TSDB_CHECK_CODE(code, lino, _exit); + if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) { + code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1)); + TSDB_CHECK_CODE(code, lino, _exit); - pRowInfo = tsdbSnapGetRow(pReader); - if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); - break; + for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) { + pReader->bData.aUid[iRow] = pReader->bData.uid; + } + pReader->bData.uid = 0; } - if (pBlockData->nRow >= 4096) break; - } + code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapCmprData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbSnapReadNextRow(pReader, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->bData.nRow >= 4096) break; + } while (pRowInfo); + + ASSERT(pReader->bData.nRow > 0); break; } + if (pReader->bData.nRow > 0) { + code = tsdbSnapCmprData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); + } + _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); } return code; } -static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; - STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pReader->fs.pDelFile; - - if (pReader->pDelFReader == NULL) { - if (pDelFile == NULL) { - goto _exit; - } - - // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - // read index - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - pReader->iDelIdx = 0; + int64_t size = sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData)); } - while (true) { - if (pReader->iDelIdx >= taosArrayGetSize(pReader->aDelIdx)) { - tsdbDelFReaderClose(&pReader->pDelFReader); - break; - } - - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pReader->aDelIdx, pReader->iDelIdx); - - pReader->iDelIdx++; - - code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData); + uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); + } - int32_t size = 0; - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + pHdr->type = SNAP_DATA_DEL; + pHdr->size = size; - if (pDelData->version >= pReader->sver && pDelData->version <= pReader->ever) { - size += tPutDelData(NULL, pDelData); - } - } - if (size == 0) continue; + TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr)); + *pId = pReader->tbid; - // org data - size = sizeof(TABLEID) + size; - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_DEL; - pHdr->size = size; - - TABLEID* pId = (TABLEID*)(&pHdr[1]); - pId->suid = pDelIdx->suid; - pId->uid = pDelIdx->uid; - int32_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); - - if (pDelData->version < pReader->sver) continue; - if (pDelData->version > pReader->ever) continue; - - n += tPutDelData((*ppData) + n, pDelData); - } - - tsdbInfo("vgId:%d, vnode snapshot tsdb read del data for %s, suid:%" PRId64 " uid:%" PRId64 " size:%d", - TD_VID(pTsdb->pVnode), pTsdb->path, pDelIdx->suid, pDelIdx->uid, size); - - break; + size = sizeof(SSnapDataHdr) + sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData)); } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + if (pData) { + taosMemoryFree(pData); + pData = NULL; + } + } + *ppData = pData; + return code; +} + +static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { + if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) { + *ppDelInfo = NULL; + } else { + *ppDelInfo = &pReader->pTIter->delInfo; + } +} + +static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbDataIterNext2( + pReader->pTIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, .sver = pReader->sver, .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (ppDelInfo) { + tsdbSnapReadGetTombData(pReader, ppDelInfo); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pReader->pTsdb; + + // open tombstone data iter if need + if (pReader->pDelFReader == NULL) { + if (pReader->fs.pDelFile == NULL) goto _exit; + + // open + code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pTIter) { + code = tsdbSnapReadNextTombData(pReader, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + // loop to get tombstone data + SDelInfo* pDelInfo; + tsdbSnapReadGetTombData(pReader, &pDelInfo); + + if (pDelInfo == NULL) goto _exit; + + pReader->tbid = *(TABLEID*)pDelInfo; + + if (pReader->aDelData) { + taosArrayClear(pReader->aDelData); + } else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { + if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapReadNextTombData(pReader, &pDelInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // encode tombstone data + if (taosArrayGetSize(pReader->aDelData) > 0) { + code = tsdbSnapCmprTombData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); } return code; } int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapReader* pReader = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); + STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -476,118 +888,80 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type pReader->ever = ever; pReader->type = type; - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - + taosThreadRwlockRdlock(&pTsdb->rwLock); code = tsdbFSRef(pTsdb, &pReader->fs); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); TSDB_CHECK_CODE(code, lino, _exit); } + taosThreadRwlockUnlock(&pTsdb->rwLock); - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // data + // init pReader->fid = INT32_MIN; - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; - - if (iIter == 0) { - pIter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pIter->aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } else { - pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pIter->aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } - - code = tBlockDataCreate(&pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - } code = tBlockDataCreate(&pReader->bData); TSDB_CHECK_CODE(code, lino, _exit); - // del - pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx)); - if (pReader->aDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pReader->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pReader->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s, TSDB path: %s", TD_VID(pTsdb->pVnode), __func__, lino, - tstrerror(code), pTsdb->path); - *ppReader = NULL; - + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), + __func__, lino, tstrerror(code), sver, ever, type); if (pReader) { - taosArrayDestroy(pReader->aDelData); - taosArrayDestroy(pReader->aDelIdx); tBlockDataDestroy(&pReader->bData, 1); - tsdbFSDestroy(&pReader->fs); + tsdbFSUnref(pTsdb, &pReader->fs); taosMemoryFree(pReader); + pReader = NULL; } } else { - *ppReader = pReader; - tsdbInfo("vgId:%d, vnode snapshot tsdb reader opened for %s", TD_VID(pTsdb->pVnode), pTsdb->path); + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever, + type); } + *ppReader = pReader; return code; } int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { - int32_t code = 0; + int32_t code = 0; + int32_t lino = 0; + STsdbSnapReader* pReader = *ppReader; + STsdb* pTsdb = pReader->pTsdb; - // data - if (pReader->pDataFReader) tsdbDataFReaderClose(&pReader->pDataFReader); - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; - - if (iIter == 0) { - taosArrayDestroy(pIter->aBlockIdx); - tMapDataClear(&pIter->mBlock); - } else { - taosArrayDestroy(pIter->aSttBlk); - } - - tBlockDataDestroy(&pIter->bData, 1); + // tombstone + if (pReader->pTIter) { + tsdbCloseDataIter2(pReader->pTIter); + pReader->pTIter = NULL; + } + if (pReader->pDelFReader) { + tsdbDelFReaderClose(&pReader->pDelFReader); } - - tBlockDataDestroy(&pReader->bData, 1); - tDestroyTSchema(pReader->skmTable.pTSchema); - - // del - if (pReader->pDelFReader) tsdbDelFReaderClose(&pReader->pDelFReader); - taosArrayDestroy(pReader->aDelIdx); taosArrayDestroy(pReader->aDelData); + // timeseries + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + if (pReader->pDataFReader) { + tsdbDataFReaderClose(&pReader->pDataFReader); + } + tBlockDataDestroy(&pReader->bData, 1); + + // other + tDestroyTSchema(pReader->skmTable.pTSchema); tsdbFSUnref(pReader->pTsdb, &pReader->fs); - - tsdbInfo("vgId:%d, vnode snapshot tsdb reader closed for %s", TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path); - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) { tFree(pReader->aBuf[iBuf]); } - taosMemoryFree(pReader); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } *ppReader = NULL; return code; } @@ -600,7 +974,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read data file if (!pReader->dataDone) { - code = tsdbSnapReadData(pReader, ppData); + code = tsdbSnapReadTimeSeriesData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -611,7 +985,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read del file if (!pReader->delDone) { - code = tsdbSnapReadDel(pReader, ppData); + code = tsdbSnapReadTombData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -622,22 +996,18 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code), - pReader->pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbDebug("vgId:%d, %s done, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path); + tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); } return code; } // STsdbSnapWriter ======================================== struct STsdbSnapWriter { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - STsdbFS fs; - - // config + STsdb* pTsdb; + int64_t sver; + int64_t ever; int32_t minutes; int8_t precision; int32_t minRow; @@ -646,641 +1016,816 @@ struct STsdbSnapWriter { int64_t commitID; uint8_t* aBuf[5]; - // for data file - SBlockData bData; - int32_t fid; - TABLEID id; - SSkmInfo skmTable; - struct { - SDataFReader* pReader; - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mDataBlk; - int32_t iDataBlk; - SBlockData bData; - int32_t iRow; - } dReader; - struct { - SDataFWriter* pWriter; - SArray* aBlockIdx; - SMapData mDataBlk; - SArray* aSttBlk; - SBlockData bData; - SBlockData sData; - } dWriter; + STsdbFS fs; + TABLEID tbid; - // for del file - SDelFReader* pDelFReader; + // time-series data + SBlockData inData; + + int32_t fid; + SSkmInfo skmTable; + + /* reader */ + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pDIter; + STsdbDataIter2* pSIter; + SRBTree rbt; // SRBTree + + /* writer */ + SDataFWriter* pDataFWriter; + SArray* aBlockIdx; + SMapData mDataBlk; // SMapData + SArray* aSttBlk; // SArray + SBlockData bData; + SBlockData sData; + + // tombstone data + /* reader */ + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; + + /* writer */ SDelFWriter* pDelFWriter; - int32_t iDelIdx; - SArray* aDelIdxR; + SArray* aDelIdx; SArray* aDelData; - SArray* aDelIdxW; }; // SNAP_DATA_TSDB -extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); -extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); - -static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) { - int32_t code = 0; - - ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow); - - if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) { - pWriter->dReader.pBlockIdx = (SBlockIdx*)taosArrayGet(pWriter->dReader.aBlockIdx, pWriter->dReader.iBlockIdx); - - code = tsdbReadDataBlk(pWriter->dReader.pReader, pWriter->dReader.pBlockIdx, &pWriter->dReader.mDataBlk); - if (code) goto _exit; - - pWriter->dReader.iBlockIdx++; - } else { - pWriter->dReader.pBlockIdx = NULL; - tMapDataReset(&pWriter->dReader.mDataBlk); - } - pWriter->dReader.iDataBlk = 0; // point to the next one - tBlockDataReset(&pWriter->dReader.bData); - pWriter->dReader.iRow = 0; - -_exit: - return code; -} - -static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) { - int32_t code = 0; - - while (true) { - if (pWriter->dReader.pBlockIdx == NULL) break; - if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break; - - SBlockIdx blkIdx = *pWriter->dReader.pBlockIdx; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dReader.mDataBlk, &blkIdx); - if (code) goto _exit; - - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blkIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - - code = tsdbSnapNextTableData(pWriter); - if (code) goto _exit; - } - -_exit: - return code; -} - static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; - code = tsdbSnapWriteCopyData(pWriter, pId); - if (code) goto _err; + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX}; + } - pWriter->id.suid = pId->suid; - pWriter->id.uid = pId->uid; + if (pWriter->pDIter) { + STsdbDataIter2* pIter = pWriter->pDIter; - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - if (code) goto _err; + // assert last table data end + ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); + ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); - tMapDataReset(&pWriter->dWriter.mDataBlk); - code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; + for (;;) { + if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { + pWriter->pDIter = NULL; + break; + } + SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); + + int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid); + if (c < 0) { + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pNewBlockIdx == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pNewBlockIdx->suid = pBlockIdx->suid; + pNewBlockIdx->uid = pBlockIdx->uid; + + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iBlockIdx++; + } else if (c == 0) { + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iDataBlk = 0; + pIter->dIter.iBlockIdx++; + + break; + } else { + pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem; + break; + } + } + } + + if (pId) { + code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); + TSDB_CHECK_CODE(code, lino, _exit); + + tMapDataReset(&pWriter->mDataBlk); + + code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) { + if ((pWriter->sData.nRow > 0)) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pId) { + TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; + code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->tbid.suid, pWriter->tbid.uid); + } return code; +} -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); +static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->bData.nRow >= pWriter->maxRow) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; + + if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && + pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + goto _write_row; + } else { + for (;;) { + while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); + + int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); + if (c < 0) { + goto _write_row; + } else if (c > 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow++; + } else { + ASSERT(0); + } + } + + for (;;) { + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; + + // FIXME: Here can be slow, use array instead + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); + if (c > 0) { + goto _write_row; + } else if (c < 0) { + if (pWriter->bData.nRow > 0) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + pWriter->pDIter->dIter.iDataBlk++; + } else { + code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow = 0; + pWriter->pDIter->dIter.iDataBlk++; + break; + } + } + } + } + +_write_row: + if (pRow) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code; + // write a NULL row to end current table data write + code = tsdbSnapWriteTableRow(pWriter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); - int32_t c = 1; - if (pWriter->dReader.pBlockIdx) { - c = tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &pWriter->id); - ASSERT(c >= 0); - } + if (pWriter->bData.nRow > 0) { + if (pWriter->bData.nRow < pWriter->minRow) { + ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid)); + for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { + code = + tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); - if (c == 0) { - SBlockData* pBData = &pWriter->dWriter.bData; - - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); - - code = tBlockDataAppendRow(pBData, &row, NULL, pWriter->id.uid); - if (code) goto _err; - - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + if (pWriter->sData.nRow >= pWriter->maxRow) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } } + + tBlockDataClear(&pWriter->bData); + } else { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); } - - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; - - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); - - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; - } - - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; } - if (pWriter->dWriter.mDataBlk.nItem) { - SBlockIdx blockIdx = {.suid = pWriter->id.suid, .uid = pWriter->id.uid}; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dWriter.mDataBlk, &blockIdx); - - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blockIdx) == NULL) { + if (pWriter->mDataBlk.nItem) { + SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } - } - pWriter->id.suid = 0; - pWriter->id.uid = 0; + pBlockIdx->suid = pWriter->tbid.suid; + pBlockIdx->uid = pWriter->tbid.uid; - return code; - -_err: - return code; -} - -static int32_t tsdbSnapWriteOpenFile(STsdbSnapWriter* pWriter, int32_t fid) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - ASSERT(pWriter->dWriter.pWriter == NULL); - - pWriter->fid = fid; - pWriter->id = (TABLEID){0}; - SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - - // Reader - if (pSet) { - code = tsdbDataFReaderOpen(&pWriter->dReader.pReader, pWriter->pTsdb, pSet); - if (code) goto _err; - - code = tsdbReadBlockIdx(pWriter->dReader.pReader, pWriter->dReader.aBlockIdx); - if (code) goto _err; - } else { - ASSERT(pWriter->dReader.pReader == NULL); - taosArrayClear(pWriter->dReader.aBlockIdx); - } - pWriter->dReader.iBlockIdx = 0; // point to the next one - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; - - // Writer - SHeadFile fHead = {.commitID = pWriter->commitID}; - SDataFile fData = {.commitID = pWriter->commitID}; - SSmaFile fSma = {.commitID = pWriter->commitID}; - SSttFile fStt = {.commitID = pWriter->commitID}; - SDFileSet wSet = {.fid = pWriter->fid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma}; - if (pSet) { - wSet.diskId = pSet->diskId; - fData = *pSet->pDataF; - fSma = *pSet->pSmaF; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - wSet.aSttF[iStt] = pSet->aSttF[iStt]; - } - wSet.nSttF = pSet->nSttF + 1; // TODO: fix pSet->nSttF == pTsdb->maxFile - } else { - SDiskID did = {0}; - tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - wSet.diskId = did; - wSet.nSttF = 1; - } - wSet.aSttF[wSet.nSttF - 1] = &fStt; - - code = tsdbDataFWriterOpen(&pWriter->dWriter.pWriter, pWriter->pTsdb, &wSet); - if (code) goto _err; - taosArrayClear(pWriter->dWriter.aBlockIdx); - tMapDataReset(&pWriter->dWriter.mDataBlk); - taosArrayClear(pWriter->dWriter.aSttBlk); - tBlockDataReset(&pWriter->dWriter.bData); - tBlockDataReset(&pWriter->dWriter.sData); - - return code; - -_err: - return code; -} - -static int32_t tsdbSnapWriteCloseFile(STsdbSnapWriter* pWriter) { - int32_t code = 0; - - ASSERT(pWriter->dWriter.pWriter); - - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; - - // copy remain table data - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapWriteCopyData(pWriter, &id); - if (code) goto _err; - - code = - tsdbWriteSttBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.sData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; - - // Indices - code = tsdbWriteBlockIdx(pWriter->dWriter.pWriter, pWriter->dWriter.aBlockIdx); - if (code) goto _err; - - code = tsdbWriteSttBlk(pWriter->dWriter.pWriter, pWriter->dWriter.aSttBlk); - if (code) goto _err; - - code = tsdbUpdateDFileSetHeader(pWriter->dWriter.pWriter); - if (code) goto _err; - - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->dWriter.pWriter->wSet); - if (code) goto _err; - - code = tsdbDataFWriterClose(&pWriter->dWriter.pWriter, 1); - if (code) goto _err; - - if (pWriter->dReader.pReader) { - code = tsdbDataFReaderClose(&pWriter->dReader.pReader); - if (code) goto _err; + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: - return code; - -_err: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) { +static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) { int32_t code = 0; + int32_t lino = 0; - SBlockData* pBData = &pWriter->bData; - TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(pBData, iRow); - TSDBKEY key = TSDBROW_KEY(&row); + ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid); - *done = 0; - while (pWriter->dReader.iRow < pWriter->dReader.bData.nRow || - pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem) { - // Merge row by row - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW trow = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); - TSDBKEY tKey = TSDBROW_KEY(&trow); + STsdb* pTsdb = pWriter->pTsdb; - ASSERT(pWriter->dReader.bData.suid == id.suid && pWriter->dReader.bData.uid == id.uid); + pWriter->fid = fid; + pWriter->tbid = (TABLEID){0}; + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - int32_t c = tsdbKeyCmprFn(&key, &tKey); - if (c < 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid); - if (code) goto _err; - } else { - ASSERT(0); - } + // open reader + pWriter->pDataFReader = NULL; + pWriter->iterList = NULL; + pWriter->pDIter = NULL; + pWriter->pSIter = NULL; + tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); + if (pSet) { + code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); + TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; - } + code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter); + TSDB_CHECK_CODE(code, lino, _exit); + if (pWriter->pDIter) { + pWriter->pDIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pDIter; + } - if (c < 0) { - *done = 1; - goto _exit; + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pSIter) { + code = tsdbSttFileDataIterNext(pWriter->pSIter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + + // add to tree + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); + + // add to list + pWriter->pSIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pSIter; } } - // Merge row by block - SDataBlk tDataBlk = {.minKey = key, .maxKey = key}; - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); + pWriter->pSIter = NULL; + } - int32_t c = tDataBlkCmprFn(&dataBlk, &tDataBlk); + // open writer + SDiskID diskId; + if (pSet) { + diskId = pSet->diskId; + } else { + tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId); + tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId); + } + SDFileSet wSet = {.diskId = diskId, + .fid = fid, + .pHeadF = &(SHeadFile){.commitID = pWriter->commitID}, + .pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID}, + .pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID}, + .nSttF = 1, + .aSttF = {&(SSttFile){.commitID = pWriter->commitID}}}; + code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->aBlockIdx) { + taosArrayClear(pWriter->aBlockIdx); + } else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataReset(&pWriter->mDataBlk); + + if (pWriter->aSttBlk) { + taosArrayClear(pWriter->aSttBlk); + } else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + tBlockDataReset(&pWriter->bData); + tBlockDataReset(&pWriter->sData); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code), + fid); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid); + } + return code; +} + +static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + // switch to new table if need + if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid) { + code = tsdbSnapWriteTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pRowInfo == NULL) goto _exit; + + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + if (pWriter->pSIter) { + code = tsdbDataIterNext2(pWriter->pSIter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { + pWriter->pSIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); + pWriter->pSIter = NULL; + } else if (c == 0) { + ASSERT(0); + } + } + } + } + + if (pWriter->pSIter == NULL) { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + tRBTreeDrop(&pWriter->rbt, pNode); + pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode); + } + } + + if (ppRowInfo) { + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; + } else { + *ppRowInfo = NULL; + } + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; + goto _exit; + } + + code = tsdbSnapWriteNextRow(pWriter, ppRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(pWriter->pDataFWriter); + + // consume remain data and end with a NULL table row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (;;) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pRowInfo == NULL) break; + + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // do file-level updates + code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pDataFReader) { + code = tsdbDataFReaderClose(&pWriter->pDataFReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // clear sources + while (pWriter->iterList) { + STsdbDataIter2* pIter = pWriter->iterList; + pWriter->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; + + code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf); + TSDB_CHECK_CODE(code, lino, _exit); + + ASSERT(pWriter->inData.nRow > 0); + + // switch to new data file if need + int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); + if (pWriter->fid != fid) { + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteFileDataStart(pWriter, fid); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // loop write each row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) { + SRowInfo rInfo = {.suid = pWriter->inData.suid, + .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], + .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; + + for (;;) { + if (pRowInfo == NULL) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else { + int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo); + if (c < 0) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else if (c > 0) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(0); + } + } + } + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow); + } + return code; +} + +// SNAP_DATA_DEL +static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { + int32_t code = 0; + int32_t lino = 0; + + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; + } + + taosArrayClear(pWriter->aDelData); + + if (pWriter->pTIter) { + while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { + SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); + + int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); if (c < 0) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; - - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; + SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1); + if (pDelIdxNew == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); } - *done = 1; - goto _exit; - } else { - code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData); - if (code) goto _err; - pWriter->dReader.iRow = 0; + pDelIdxNew->suid = pDelIdx->suid; + pDelIdxNew->uid = pDelIdx->uid; - pWriter->dReader.iDataBlk++; + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pTIter->tIter.iDelIdx++; + } else if (c == 0) { + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pTIter->tIter.iDelIdx++; + break; + } else { break; } } } _exit: - return code; - -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - return code; -} - -static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { - int32_t code = 0; - - TABLEID id = {.suid = pWriter->bData.suid, - .uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow); - SBlockData* pBData = &pWriter->dWriter.sData; - - if (pBData->suid || pBData->uid) { - if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; - - pBData->suid = 0; - pBData->uid = 0; - } - } - - if (pBData->suid == 0 && pBData->uid == 0) { - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable); - if (code) goto _err; - - TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid}; - code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; - } - - code = tBlockDataAppendRow(pBData, &row, NULL, id.uid); - if (code) goto _err; - - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; - } - -_exit: - return code; - -_err: - return code; -} - -static int32_t tsdbSnapWriteRowData(STsdbSnapWriter* pWriter, int32_t iRow) { - int32_t code = 0; - - SBlockData* pBlockData = &pWriter->bData; - TABLEID id = {.suid = pBlockData->suid, .uid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[iRow]}; - - // End last table data write if need - if (tTABLEIDCmprFn(&pWriter->id, &id) != 0) { - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; - } - - // Start new table data write if need - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) { - code = tsdbSnapWriteTableDataStart(pWriter, &id); - if (code) goto _err; - } - - // Merge with .data file data - int8_t done = 0; - if (pWriter->dReader.pBlockIdx && tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &id) == 0) { - code = tsdbSnapWriteToDataFile(pWriter, iRow, &done); - if (code) goto _err; - } - - // Append to the .stt data block (todo: check if need to set/reload sst block) - if (!done) { - code = tsdbSnapWriteToSttFile(pWriter, iRow); - if (code) goto _err; - } - -_exit: - return code; - -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - return code; -} - -static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - SBlockData* pBlockData = &pWriter->bData; - - // Decode data - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - code = tDecmprBlockData(pHdr->data, pHdr->size, pBlockData, pWriter->aBuf); - if (code) goto _err; - - ASSERT(pBlockData->nRow > 0); - - // Loop to handle each row - for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { - TSKEY ts = pBlockData->aTSKEY[iRow]; - int32_t fid = tsdbKeyFid(ts, pWriter->minutes, pWriter->precision); - - if (pWriter->dWriter.pWriter == NULL || pWriter->fid != fid) { - if (pWriter->dWriter.pWriter) { - // ASSERT(fid > pWriter->fid); - - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } - - code = tsdbSnapWriteOpenFile(pWriter, fid); - if (code) goto _err; - } - - code = tsdbSnapWriteRowData(pWriter, iRow); - if (code) goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write data for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); - return code; -} - -// SNAP_DATA_DEL -static int32_t tsdbSnapMoveWriteDelData(STsdbSnapWriter* pWriter, TABLEID* pId) { - int32_t code = 0; - - while (true) { - if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break; - - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - - if (tTABLEIDCmprFn(pDelIdx, pId) >= 0) break; - - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _exit; - - SDelIdx delIdx = *pDelIdx; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _exit; - - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - - pWriter->iDelIdx++; - } - -_exit: - return code; -} - -static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - // Open del file if not opened yet - if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = pWriter->fs.pDelFile; - - // reader - if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); - if (code) goto _err; - - code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR); - if (code) goto _err; - } else { - taosArrayClear(pWriter->aDelIdxR); - } - pWriter->iDelIdx = 0; - - // writer - SDelFile delFile = {.commitID = pWriter->commitID}; - code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &delFile, pTsdb); - if (code) goto _err; - taosArrayClear(pWriter->aDelIdxW); - } - - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - TABLEID id = *(TABLEID*)pHdr->data; - - ASSERT(pHdr->size + sizeof(SSnapDataHdr) == nData); - - // Move write data < id - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; - - // Merge incoming data with current - if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) && - tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) { - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _err; - - pWriter->iDelIdx++; + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - taosArrayClear(pWriter->aDelData); + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->tbid.suid, pWriter->tbid.uid); + } + return code; +} + +static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + if (taosArrayGetSize(pWriter->aDelData) > 0) { + SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1); + if (pDelIdx == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pDelIdx->suid = pWriter->tbid.suid; + pDelIdx->uid = pWriter->tbid.uid; + + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); } - int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - while (n < nData) { - SDelData delData; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } + return code; +} +static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) { + int32_t code = 0; + int32_t lino = 0; + + if (pId == NULL || pId->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid) { + code = tsdbSnapWriteDelTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteDelTableDataStart(pWriter, pId); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pId == NULL) goto _exit; + + int64_t n = 0; + while (n < size) { + SDelData delData; n += tGetDelData(pData + n, &delData); - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { + if (taosArrayPush(pWriter->aDelData, &delData) < 0) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } } + ASSERT(n == size); - SDelIdx delIdx = {.suid = id.suid, .uid = id.uid}; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _err; - - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); } - - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); return code; } -static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) { int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; + int32_t lino = 0; - if (pWriter->pDelFWriter == NULL) return code; + STsdb* pTsdb = pWriter->pTsdb; + SDelFile* pDelFile = pWriter->fs.pDelFile; - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; + pWriter->tbid = (TABLEID){0}; - code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdxW); - if (code) goto _err; + // reader + if (pDelFile) { + code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // writer + code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pWriter->pTsdb; + + // end remaining table with NULL data + code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + + // update file-level info + code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->pDelFReader) { code = tsdbDelFReaderClose(&pWriter->pDelFReader); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } - tsdbInfo("vgId:%d, vnode snapshot tsdb write del for %s end", TD_VID(pTsdb->pVnode), pTsdb->path); - return code; + if (pWriter->pTIter) { + tsdbCloseDataIter2(pWriter->pTIter); + pWriter->pTIter = NULL; + } -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del end for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pWriter->pTsdb; + + // start to write del data if need + if (pWriter->pDelFWriter == NULL) { + code = tsdbSnapWriteDelDataStart(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // do write del data + code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), + pHdr->size - sizeof(TABLEID)); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } // APIs int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapWriter* pWriter = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -1288,11 +1833,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->pTsdb = pTsdb; pWriter->sver = sver; pWriter->ever = ever; - - code = tsdbFSCopy(pTsdb, &pWriter->fs); - TSDB_CHECK_CODE(code, lino, _exit); - - // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; @@ -1300,102 +1840,70 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; pWriter->commitID = pTsdb->pVnode->state.commitID; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + TSDB_CHECK_CODE(code, lino, _exit); + // SNAP_DATA_TSDB - code = tBlockDataCreate(&pWriter->bData); + code = tBlockDataCreate(&pWriter->inData); TSDB_CHECK_CODE(code, lino, _exit); pWriter->fid = INT32_MIN; - pWriter->id = (TABLEID){0}; - // Reader - pWriter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dReader.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dReader.bData); + + code = tBlockDataCreate(&pWriter->bData); TSDB_CHECK_CODE(code, lino, _exit); - // Writer - pWriter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dWriter.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pWriter->dWriter.aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dWriter.bData); - TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataCreate(&pWriter->dWriter.sData); + code = tBlockDataCreate(&pWriter->sData); TSDB_CHECK_CODE(code, lino, _exit); // SNAP_DATA_DEL - pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxR == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pWriter->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelIdxW = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxW == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - *ppWriter = NULL; - + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); if (pWriter) { - if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW); - if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData); - if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR); - tBlockDataDestroy(&pWriter->dWriter.sData, 1); - tBlockDataDestroy(&pWriter->dWriter.bData, 1); - if (pWriter->dWriter.aSttBlk) taosArrayDestroy(pWriter->dWriter.aSttBlk); - if (pWriter->dWriter.aBlockIdx) taosArrayDestroy(pWriter->dWriter.aBlockIdx); - tBlockDataDestroy(&pWriter->dReader.bData, 1); - if (pWriter->dReader.aBlockIdx) taosArrayDestroy(pWriter->dReader.aBlockIdx); + tBlockDataDestroy(&pWriter->sData, 1); tBlockDataDestroy(&pWriter->bData, 1); + tBlockDataDestroy(&pWriter->inData, 1); tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); + pWriter = NULL; } } else { - tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__); - *ppWriter = pWriter; + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever); } + *ppWriter = pWriter; return code; } int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { int32_t code = 0; - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _exit; + int32_t lino = 0; + + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapWriteDelEnd(pWriter); - if (code) goto _exit; + if (pWriter->pDelFWriter) { + code = tsdbSnapWriteDelDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); - if (code) goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); } return code; } int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; + int32_t code = 0; + int32_t lino = 0; + STsdbSnapWriter* pWriter = *ppWriter; STsdb* pTsdb = pWriter->pTsdb; @@ -1408,7 +1916,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbFSCommit(pWriter->pTsdb); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } // unlock @@ -1416,72 +1924,60 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { } // SNAP_DATA_DEL - taosArrayDestroy(pWriter->aDelIdxW); taosArrayDestroy(pWriter->aDelData); - taosArrayDestroy(pWriter->aDelIdxR); + taosArrayDestroy(pWriter->aDelIdx); // SNAP_DATA_TSDB - - // Writer - tBlockDataDestroy(&pWriter->dWriter.sData, 1); - tBlockDataDestroy(&pWriter->dWriter.bData, 1); - taosArrayDestroy(pWriter->dWriter.aSttBlk); - tMapDataClear(&pWriter->dWriter.mDataBlk); - taosArrayDestroy(pWriter->dWriter.aBlockIdx); - - // Reader - tBlockDataDestroy(&pWriter->dReader.bData, 1); - tMapDataClear(&pWriter->dReader.mDataBlk); - taosArrayDestroy(pWriter->dReader.aBlockIdx); - + tBlockDataDestroy(&pWriter->sData, 1); tBlockDataDestroy(&pWriter->bData, 1); + taosArrayDestroy(pWriter->aSttBlk); + tMapDataClear(&pWriter->mDataBlk); + taosArrayDestroy(pWriter->aBlockIdx); tDestroyTSchema(pWriter->skmTable.pTSchema); + tBlockDataDestroy(&pWriter->inData, 1); for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { tFree(pWriter->aBuf[iBuf]); } - tsdbInfo("vgId:%d, %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + tsdbFSDestroy(&pWriter->fs); taosMemoryFree(pWriter); *ppWriter = NULL; - return code; -_err: - tsdbError("vgId:%d, vnode snapshot tsdb writer close for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), - pWriter->pTsdb->path, tstrerror(code)); - taosMemoryFree(pWriter); - *ppWriter = NULL; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; - // ts data if (pHdr->type == SNAP_DATA_TSDB) { - code = tsdbSnapWriteData(pWriter, pData, nData); - if (code) goto _err; - + code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); goto _exit; - } else { - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } + } else if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - // del data if (pHdr->type == SNAP_DATA_DEL) { - code = tsdbSnapWriteDel(pWriter, pData, nData); - if (code) goto _err; + code = tsdbSnapWriteDelData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; } _exit: - tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path); - return code; - -_err: - tsdbError("vgId:%d, tsdb snapshot write for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path, - tstrerror(code)); + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, + TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size); + } else { + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pHdr->type, pHdr->index, pHdr->size); + } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 86adc1dc80..e2d4b92836 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -684,7 +684,7 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; - + pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; @@ -731,6 +731,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); if (key.version > pMerger->version) { +#if 0 if (!COL_VAL_IS_NONE(pColVal)) { if ((!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); @@ -746,6 +747,28 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { taosArraySet(pMerger->pArray, iCol, pColVal); } } +#endif + if (!COL_VAL_IS_NONE(pColVal)) { + if (IS_VAR_DATA_TYPE(pColVal->type)) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (!COL_VAL_IS_NULL(pColVal)) { + code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); + if (code) return code; + + pTColVal->value.nData = pColVal->value.nData; + if (pTColVal->value.nData) { + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + } + pTColVal->flag = 0; + } else { + tFree(pTColVal->value.pData); + pTColVal->value.pData = NULL; + taosArraySet(pMerger->pArray, iCol, pColVal); + } + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } + } } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { @@ -753,7 +776,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { code = tRealloc(&tColVal->value.pData, pColVal->value.nData); if (code) return code; - tColVal->value.nData = pColVal->value.nData; + tColVal->value.nData = pColVal->value.nData; if (pColVal->value.nData) { memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); } @@ -802,7 +825,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; - + pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; @@ -811,7 +834,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { memcpy(pColVal->value.pData, pVal, pColVal->value.nData); } } - + if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -822,7 +845,7 @@ _exit: return code; } -void tRowMergerClear(SRowMerger *pMerger) { +void tRowMergerClear(SRowMerger *pMerger) { for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); if (IS_VAR_DATA_TYPE(pTColVal->type)) { @@ -830,7 +853,7 @@ void tRowMergerClear(SRowMerger *pMerger) { } } - taosArrayDestroy(pMerger->pArray); + taosArrayDestroy(pMerger->pArray); } int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { @@ -853,7 +876,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { pTColVal->value.nData = pColVal->value.nData; if (pTColVal->value.nData) { - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); } pTColVal->flag = 0; } else { @@ -875,7 +898,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { tColVal->value.nData = pColVal->value.nData; if (tColVal->value.nData) { - memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); } tColVal->flag = 0; } else { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 8c07d0cec7..58d9f1a049 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -86,7 +86,7 @@ int32_t vnodeAlter(const char *path, SAlterVnodeReplicaReq *pReq, STfs *pTfs) { pNode->nodeId = pReq->replicas[i].id; pNode->nodePort = pReq->replicas[i].port; tstrncpy(pNode->nodeFqdn, pReq->replicas[i].fqdn, sizeof(pNode->nodeFqdn)); - (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); + tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId); } diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index e75dc24329..43f903dc48 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { if (code) goto _err; } - code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData); + code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr); if (code) goto _err; } break; case SNAP_DATA_TQ_HANDLE: { diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index b25097b837..41c7eca7f0 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -805,6 +805,7 @@ int32_t ctgMakeVgArray(SDBVgInfo* dbInfo); int32_t ctgAcquireVgMetaFromCache(SCatalog *pCtg, const char *dbFName, const char *tbName, SCtgDBCache **pDb, SCtgTbCache **pTb); int32_t ctgCopyTbMeta(SCatalog *pCtg, SCtgTbMetaCtx *ctx, SCtgDBCache **pDb, SCtgTbCache **pTb, STableMeta **pTableMeta, char* dbFName); void ctgReleaseVgMetaToCache(SCatalog *pCtg, SCtgDBCache *dbCache, SCtgTbCache *pCache); +void ctgReleaseTbMetaToCache(SCatalog *pCtg, SCtgDBCache *dbCache, SCtgTbCache *pCache); extern SCatalogMgmt gCtgMgmt; extern SCtgDebug gCTGDebug; diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index c7af0411be..f9a218835e 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -598,10 +598,16 @@ int32_t ctgGetCachedTbVgMeta(SCatalog* pCtg, const SName* pTableName, SVgroupInf CTG_ERR_JRET(ctgGetVgInfoFromHashValue(pCtg, dbCache->vgCache.vgInfo, pTableName, pVgroup)); + ctgRUnlockVgInfo(dbCache); + SCtgTbMetaCtx ctx = {0}; ctx.pName = (SName*)pTableName; ctx.flag = CTG_FLAG_UNKNOWN_STB; - CTG_ERR_JRET(ctgCopyTbMeta(pCtg, &ctx, &dbCache, &tbCache, pTableMeta, db)); + code = ctgCopyTbMeta(pCtg, &ctx, &dbCache, &tbCache, pTableMeta, db); + + ctgReleaseTbMetaToCache(pCtg, dbCache, tbCache); + + CTG_RET(code); _return: diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 9237d77c47..89e92b0cc8 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -999,6 +999,7 @@ int32_t ctgHandleGetTbMetaRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBuf CTG_ERR_JRET(ctgGetTbMetaFromVnode(pCtg, pConn, pName, &vgInfo, NULL, tReq)); ctgReleaseVgInfoToCache(pCtg, dbCache); + dbCache = NULL; } else { SBuildUseDBInput input = {0}; @@ -1168,6 +1169,7 @@ int32_t ctgHandleGetTbMetasRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBu CTG_ERR_JRET(ctgGetTbMetaFromVnode(pCtg, pConn, pName, &vgInfo, NULL, tReq)); ctgReleaseVgInfoToCache(pCtg, dbCache); + dbCache = NULL; } else { SBuildUseDBInput input = {0}; diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index c41a7e5967..06db2c3268 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -2118,7 +2118,7 @@ int32_t ctgOpUpdateEpset(SCtgCacheOperation *operation) { _return: - if (dbCache) { + if (code == TSDB_CODE_SUCCESS && dbCache) { ctgWUnlockVgInfo(dbCache); } diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index a179ec24f9..3f10ed7388 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -264,10 +264,10 @@ static void setCreateDBResultIntoDataBlock(SSDataBlock* pBlock, char* dbFName, S len += sprintf( buf2 + VARSTR_HEADER_SIZE, "CREATE DATABASE `%s` BUFFER %d CACHESIZE %d CACHEMODEL '%s' COMP %d DURATION %dm " - "WAL_FSYNC_PERIOD %d MAXROWS %d MINROWS %d KEEP %dm,%dm,%dm PAGES %d PAGESIZE %d PRECISION '%s' REPLICA %d " + "WAL_FSYNC_PERIOD %d MAXROWS %d MINROWS %d STT_TRIGGER %d KEEP %dm,%dm,%dm PAGES %d PAGESIZE %d PRECISION '%s' REPLICA %d " "WAL_LEVEL %d VGROUPS %d SINGLE_STABLE %d", dbFName, pCfg->buffer, pCfg->cacheSize, cacheModelStr(pCfg->cacheLast), pCfg->compression, pCfg->daysPerFile, - pCfg->walFsyncPeriod, pCfg->maxRows, pCfg->minRows, pCfg->daysToKeep0, pCfg->daysToKeep1, pCfg->daysToKeep2, + pCfg->walFsyncPeriod, pCfg->maxRows, pCfg->minRows, pCfg->sstTrigger, pCfg->daysToKeep0, pCfg->daysToKeep1, pCfg->daysToKeep2, pCfg->pages, pCfg->pageSize, prec, pCfg->replications, pCfg->walLevel, pCfg->numOfVgroups, 1 == pCfg->numOfStables); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index fffe687ff6..c68f7c4697 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -705,7 +705,8 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo); void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo); -void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator); +void resetLimitInfoForNextGroup(SLimitInfo* pLimitInfo); +bool applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator); void applyAggFunctionOnPartialTuples(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, int32_t offset, int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput); diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index 9873c52006..037b33dc9f 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -584,7 +584,13 @@ int32_t doExtractResultBlocks(SExchangeInfo* pExchangeInfo, SSourceDataInfo* pDa int32_t index = 0; int32_t code = 0; while (index++ < pRetrieveRsp->numOfBlocks) { - SSDataBlock* pb = createOneDataBlock(pExchangeInfo->pDummyBlock, false); + SSDataBlock* pb = NULL; + if (taosArrayGetSize(pExchangeInfo->pRecycledBlocks) > 0) { + pb = *(SSDataBlock**)taosArrayPop(pExchangeInfo->pRecycledBlocks); + blockDataCleanup(pb); + } else { + pb = createOneDataBlock(pExchangeInfo->pDummyBlock, false); + } code = extractDataBlockFromFetchRsp(pb, pStart, NULL, &pStart); if (code != 0) { @@ -732,9 +738,7 @@ int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDa } // reset the value for a new group data - pLimitInfo->numOfOutputRows = 0; - pLimitInfo->remainOffset = pLimitInfo->limit.offset; - + resetLimitInfoForNextGroup(pLimitInfo); // existing rows that belongs to previous group. if (pBlock->info.rows > 0) { return PROJECT_RETRIEVE_DONE; @@ -760,7 +764,12 @@ int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDa int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows); blockDataKeepFirstNRows(pBlock, keepRows); if (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups) { - pOperator->status = OP_EXEC_DONE; + setOperatorCompleted(pOperator); + } else { + // current group limitation is reached, and future blocks of this group need to be discarded. + if (pBlock->info.rows == 0) { + return PROJECT_RETRIEVE_CONTINUE; + } } return PROJECT_RETRIEVE_DONE; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index c55ae08439..e65708326e 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1789,6 +1789,11 @@ void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimit pLimitInfo->remainGroupOffset = slimit.offset; } +void resetLimitInfoForNextGroup(SLimitInfo* pLimitInfo) { + pLimitInfo->numOfOutputRows = 0; + pLimitInfo->remainOffset = pLimitInfo->limit.offset; +} + uint64_t tableListGetSize(const STableListInfo* pTableList) { ASSERT(taosArrayGetSize(pTableList->pTableList) == taosHashGetSize(pTableList->map)); return taosArrayGetSize(pTableList->pTableList); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 814ead57f0..6c354c3d61 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -24,12 +24,16 @@ static TdThreadOnce initPoolOnce = PTHREAD_ONCE_INIT; int32_t exchangeObjRefPool = -1; -static void initRefPool() { exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); } static void cleanupRefPool() { int32_t ref = atomic_val_compare_exchange_32(&exchangeObjRefPool, exchangeObjRefPool, 0); taosCloseRef(ref); } +static void initRefPool() { + exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); + atexit(cleanupRefPool); +} + static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, char* id) { ASSERT(pOperator != NULL); if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { @@ -442,7 +446,6 @@ int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, SExecTaskInfo** pTask = (SExecTaskInfo**)pTaskInfo; taosThreadOnce(&initPoolOnce, initRefPool); - atexit(cleanupRefPool); qDebug("start to create subplan task, TID:0x%" PRIx64 " QID:0x%" PRIx64, taskId, pSubplan->id.queryId); diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 5676e19cdf..bf4b9a2599 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -593,8 +593,11 @@ void* getCurrentDataGroupInfo(const SPartitionOperatorInfo* pInfo, SDataGroupInf int32_t pageId = 0; pPage = getNewBufPage(pInfo->pBuf, &pageId); - taosArrayPush(p->pPageList, &pageId); + if (pPage == NULL) { + return pPage; + } + taosArrayPush(p->pPageList, &pageId); *(int32_t*)pPage = 0; } else { int32_t* curId = taosArrayGetLast(p->pPageList); @@ -612,6 +615,11 @@ void* getCurrentDataGroupInfo(const SPartitionOperatorInfo* pInfo, SDataGroupInf // add a new page for current group int32_t pageId = 0; pPage = getNewBufPage(pInfo->pBuf, &pageId); + if (pPage == NULL) { + qError("failed to get new buffer, code:%s", tstrerror(terrno)); + return NULL; + } + taosArrayPush(p->pPageList, &pageId); memset(pPage, 0, getBufPageSize(pInfo->pBuf)); } diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index 4d38f2c8e9..3e3610827b 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -175,8 +175,7 @@ static int32_t setInfoForNewGroup(SSDataBlock* pBlock, SLimitInfo* pLimitInfo, S // reset the value for a new group data // existing rows that belongs to previous group. - pLimitInfo->numOfOutputRows = 0; - pLimitInfo->remainOffset = pLimitInfo->limit.offset; + resetLimitInfoForNextGroup(pLimitInfo); } return PROJECT_RETRIEVE_DONE; @@ -200,10 +199,18 @@ static int32_t doIngroupLimitOffset(SLimitInfo* pLimitInfo, uint64_t groupId, SS if (pLimitInfo->limit.limit >= 0 && pLimitInfo->numOfOutputRows + pBlock->info.rows >= pLimitInfo->limit.limit) { int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows); blockDataKeepFirstNRows(pBlock, keepRows); + // TODO: optimize it later when partition by + limit + // all retrieved requirement has been fulfilled, let's finish this if ((pLimitInfo->slimit.limit == -1 && pLimitInfo->currentGroupId == 0) || (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups)) { setOperatorCompleted(pOperator); + } else { + // Even current group is done, there may be many vgroups remain existed, and we need to continue to retrieve data + // from next group. So let's continue this retrieve process + if (keepRows == 0) { + return PROJECT_RETRIEVE_CONTINUE; + } } } @@ -357,7 +364,6 @@ SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; } - // printDataBlock1(p, "project"); return (p->info.rows > 0) ? p : NULL; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index eb38299938..053d7a279c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -257,7 +257,7 @@ static void doSetTagColumnData(STableScanBase* pTableScanInfo, SSDataBlock* pBlo } // todo handle the slimit info -void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator) { +bool applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator) { SLimit* pLimit = &pLimitInfo->limit; const char* id = GET_TASKID(pTaskInfo); @@ -266,6 +266,7 @@ void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo pLimitInfo->remainOffset -= pBlock->info.rows; blockDataEmpty(pBlock); qDebug("current block ignore due to offset, current:%" PRId64 ", %s", pLimitInfo->remainOffset, id); + return false; } else { blockDataTrimFirstNRows(pBlock, pLimitInfo->remainOffset); pLimitInfo->remainOffset = 0; @@ -274,13 +275,14 @@ void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo if (pLimit->limit != -1 && pLimit->limit <= (pLimitInfo->numOfOutputRows + pBlock->info.rows)) { // limit the output rows - int32_t overflowRows = pLimitInfo->numOfOutputRows + pBlock->info.rows - pLimit->limit; - int32_t keep = pBlock->info.rows - overflowRows; + int32_t keep = (int32_t)(pLimit->limit - pLimitInfo->numOfOutputRows); blockDataKeepFirstNRows(pBlock, keep); qDebug("output limit %" PRId64 " has reached, %s", pLimit->limit, id); - pOperator->status = OP_EXEC_DONE; + return true; } + + return false; } static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanBase* pTableScanInfo, SSDataBlock* pBlock, @@ -391,7 +393,10 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanBase* pTableSca } } - applyLimitOffset(&pTableScanInfo->limitInfo, pBlock, pTaskInfo, pOperator); + bool limitReached = applyLimitOffset(&pTableScanInfo->limitInfo, pBlock, pTaskInfo, pOperator); + if (limitReached) { // set operator flag is done + setOperatorCompleted(pOperator); + } pCost->totalRows += pBlock->info.rows; pTableScanInfo->limitInfo.numOfOutputRows = pCost->totalRows; @@ -768,8 +773,7 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) { // reset value for the next group data output pOperator->status = OP_OPENED; - pInfo->base.limitInfo.numOfOutputRows = 0; - pInfo->base.limitInfo.remainOffset = pInfo->base.limitInfo.limit.offset; + resetLimitInfoForNextGroup(&pInfo->base.limitInfo); int32_t num = 0; STableKeyInfo* pList = NULL; @@ -2685,9 +2689,12 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { taosArrayDestroy(pInfo->queryConds); pInfo->queryConds = NULL; + resetLimitInfoForNextGroup(&pInfo->limitInfo); return TSDB_CODE_SUCCESS; } +// all data produced by this function only belongs to one group +// slimit/soffset does not need to be concerned here, since this function only deal with data within one group. SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* pResBlock, int32_t capacity, SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; @@ -2707,10 +2714,12 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* } } - qDebug("%s get sorted row blocks, rows:%d", GET_TASKID(pTaskInfo), pResBlock->info.rows); applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo, pOperator); pInfo->limitInfo.numOfOutputRows += pResBlock->info.rows; + qDebug("%s get sorted row block, rows:%d, limit:%"PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, + pInfo->limitInfo.numOfOutputRows); + return (pResBlock->info.rows > 0) ? pResBlock : NULL; } @@ -2749,11 +2758,13 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { pOperator->resultInfo.totalRows += pBlock->info.rows; return pBlock; } else { + // Data of this group are all dumped, let's try the next group stopGroupTableMergeScan(pOperator); if (pInfo->tableEndIndex >= tableListSize - 1) { setOperatorCompleted(pOperator); break; } + pInfo->tableStartIndex = pInfo->tableEndIndex + 1; pInfo->groupId = tableListGetInfo(pTaskInfo->pTableInfoList, pInfo->tableStartIndex)->groupId; startGroupTableMergeScan(pOperator); @@ -3222,7 +3233,9 @@ static void buildVnodeGroupedNtbTableCount(STableCountScanOperatorInfo* pInfo, S uint64_t groupId = calcGroupId(fullStbName, strlen(fullStbName)); pRes->info.id.groupId = groupId; int64_t ntbNum = metaGetNtbNum(pInfo->readHandle.meta); - fillTableCountScanDataBlock(pSupp, dbName, "", ntbNum, pRes); + if (ntbNum != 0) { + fillTableCountScanDataBlock(pSupp, dbName, "", ntbNum, pRes); + } } static void buildVnodeGroupedStbTableCount(STableCountScanOperatorInfo* pInfo, STableCountScanSupp* pSupp, diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index f5dc6cc623..97b4fd9dc4 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -680,11 +680,13 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData break; } + bool limitReached = applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo, pOperator); + if (limitReached) { + resetLimitInfoForNextGroup(&pInfo->limitInfo); + } + if (p->info.rows > 0) { - applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo, pOperator); - if (p->info.rows > 0) { - break; - } + break; } } @@ -698,7 +700,6 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info); } - pInfo->limitInfo.numOfOutputRows += p->info.rows; pDataBlock->info.rows = p->info.rows; pDataBlock->info.id.groupId = pInfo->groupId; pDataBlock->info.dataLoad = 1; diff --git a/source/libs/executor/src/sysscanoperator.c b/source/libs/executor/src/sysscanoperator.c index 05570eda2f..b4da3ae710 100644 --- a/source/libs/executor/src/sysscanoperator.c +++ b/source/libs/executor/src/sysscanoperator.c @@ -491,6 +491,7 @@ static SSDataBlock* sysTableScanUserTags(SOperatorInfo* pOperator) { pInfo->pCur = metaOpenTbCursor(pInfo->readHandle.meta); } + bool blockFull = false; while ((ret = metaTbCursorNext(pInfo->pCur)) == 0) { if (pInfo->pCur->mr.me.type != TSDB_CHILD_TABLE) { continue; @@ -512,17 +513,24 @@ static SSDataBlock* sysTableScanUserTags(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, terrno); } - sysTableUserTagsFillOneTableTags(pInfo, &smrSuperTable, &pInfo->pCur->mr, dbname, tableName, &numOfRows, dataBlock); - + if ((smrSuperTable.me.stbEntry.schemaTag.nCols + numOfRows) > pOperator->resultInfo.capacity) { + metaTbCursorPrev(pInfo->pCur); + blockFull = true; + } else { + sysTableUserTagsFillOneTableTags(pInfo, &smrSuperTable, &pInfo->pCur->mr, dbname, tableName, &numOfRows, dataBlock); + } + metaReaderClear(&smrSuperTable); - if (numOfRows >= pOperator->resultInfo.capacity) { + if (blockFull || numOfRows >= pOperator->resultInfo.capacity) { relocateAndFilterSysTagsScanResult(pInfo, numOfRows, dataBlock, pOperator->exprSupp.pFilterInfo); numOfRows = 0; if (pInfo->pRes->info.rows > 0) { break; } + + blockFull = false; } } diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 4cac29cec8..cbc2f924c6 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2477,7 +2477,19 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p pInfo->delKey = key; } int32_t prevEndPos = (forwardRows - 1) * step + startPos; - ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0); + if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { + qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64 + ",maxKey %" PRId64, + pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); + blockDataUpdateTsWindow(pSDataBlock, 0); + + // timestamp of the data is incorrect + if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { + qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64, + pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); + } + } + if (IS_FINAL_OP(pInfo)) { startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos); } else { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index b0eb01e6f1..c831c3183b 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3087,14 +3087,12 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, if (pHandle->currentPage == -1) { pPage = getNewBufPage(pHandle->pBuf, &pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } pPage->num = sizeof(SFilePage); } else { pPage = getBufPage(pHandle->pBuf, pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } if (pPage->num + length > getBufPageSize(pHandle->pBuf)) { @@ -3102,7 +3100,6 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, releaseBufPage(pHandle->pBuf, pPage); pPage = getNewBufPage(pHandle->pBuf, &pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } pPage->num = sizeof(SFilePage); @@ -3149,7 +3146,6 @@ static int32_t doUpdateTupleData(SSerializeDataHandle* pHandle, const void* pBuf if (pHandle->pBuf != NULL) { SFilePage* pPage = getBufPage(pHandle->pBuf, pPos->pageId); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } memcpy(pPage->data + pPos->offset, pBuf, length); diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index 272e5bf059..1734b535f1 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -43,8 +43,8 @@ static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) if (pg == NULL) { return NULL; } - memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); + memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); offset += (int32_t)(pg->num * pMemBucket->bytes); } @@ -109,7 +109,7 @@ int32_t findOnlyResult(tMemBucket *pMemBucket, double *result) { int32_t *pageId = taosArrayGet(list, 0); SFilePage *pPage = getBufPage(pMemBucket->pBuffer, *pageId); if (pPage == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } ASSERT(pPage->num == 1); @@ -276,7 +276,7 @@ tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, return NULL; } - int32_t ret = createDiskbasedBuf(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, "1", tsTempDir); + int32_t ret = createDiskbasedBuf(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 1024, "1", tsTempDir); if (ret != 0) { tMemBucketDestroy(pBucket); return NULL; @@ -386,7 +386,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) { pSlot->info.data = getNewBufPage(pBucket->pBuffer, &pageId); if (pSlot->info.data == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } pSlot->info.pageId = pageId; taosArrayPush(pPageIdList, &pageId); @@ -480,8 +480,9 @@ int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction // data in buffer and file are merged together to be processed. SFilePage *buffer = loadDataFromFilePage(pMemBucket, i); if (buffer == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } + int32_t currentIdx = count - num; char *thisVal = buffer->data + pMemBucket->bytes * currentIdx; @@ -518,7 +519,7 @@ int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction int32_t *pageId = taosArrayGet(list, f); SFilePage *pg = getBufPage(pMemBucket->pBuffer, *pageId); if (pg == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } int32_t code = tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num); diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 98c6aed829..1e1821842f 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -1760,6 +1760,9 @@ static int32_t getTableSchemaFromMetaData(SInsertParseContext* pCxt, const SMeta if (TSDB_CODE_SUCCESS == code && !isStb && TSDB_SUPER_TABLE == pStmt->pTableMeta->tableType) { code = buildInvalidOperationMsg(&pCxt->msg, "insert data into super table is not supported"); } + if (TSDB_CODE_SUCCESS == code && isStb) { + code = storeTableMeta(pCxt, pStmt); + } if (TSDB_CODE_SUCCESS == code) { code = getTableVgroupFromMetaData(pMetaData->pTableHash, pStmt, isStb); } diff --git a/source/libs/parser/src/parInsertStmt.c b/source/libs/parser/src/parInsertStmt.c index 4ed72e6c14..1f437e8a8c 100644 --- a/source/libs/parser/src/parInsertStmt.c +++ b/source/libs/parser/src/parInsertStmt.c @@ -425,6 +425,27 @@ int32_t qCloneStmtDataBlock(void** pDst, void* pSrc) { pBlock->pTableMeta = pNewMeta; } + if (pBlock->boundColumnInfo.boundColumns) { + int32_t size = pBlock->boundColumnInfo.numOfCols * sizeof(col_id_t); + void* tmp = taosMemoryMalloc(size); + memcpy(tmp, pBlock->boundColumnInfo.boundColumns, size); + pBlock->boundColumnInfo.boundColumns = tmp; + } + + if (pBlock->boundColumnInfo.cols) { + int32_t size = pBlock->boundColumnInfo.numOfCols * sizeof(SBoundColumn); + void* tmp = taosMemoryMalloc(size); + memcpy(tmp, pBlock->boundColumnInfo.cols, size); + pBlock->boundColumnInfo.cols = tmp; + } + + if (pBlock->boundColumnInfo.colIdxInfo) { + int32_t size = pBlock->boundColumnInfo.numOfBound * sizeof(SBoundIdxInfo); + void* tmp = taosMemoryMalloc(size); + memcpy(tmp, pBlock->boundColumnInfo.colIdxInfo, size); + pBlock->boundColumnInfo.colIdxInfo = tmp; + } + return qResetStmtDataBlock(*pDst, false); } @@ -437,7 +458,7 @@ int32_t qRebuildStmtDataBlock(void** pDst, void* pSrc, uint64_t uid, int32_t vgI STableDataBlocks* pBlock = (STableDataBlocks*)*pDst; pBlock->pData = taosMemoryMalloc(pBlock->nAllocSize); if (NULL == pBlock->pData) { - qFreeStmtDataBlock(pBlock); + qDestroyStmtDataBlock(pBlock); return TSDB_CODE_OUT_OF_MEMORY; } diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 03e0275d7d..e901297f4d 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1080,29 +1080,29 @@ static bool sortPriKeyOptMayBeOptimized(SLogicNode* pNode) { return false; } SSortLogicNode* pSort = (SSortLogicNode*)pNode; - if (pSort->groupSort || !sortPriKeyOptIsPriKeyOrderBy(pSort->pSortKeys) || 1 != LIST_LENGTH(pSort->node.pChildren)) { + if (!sortPriKeyOptIsPriKeyOrderBy(pSort->pSortKeys) || 1 != LIST_LENGTH(pSort->node.pChildren)) { return false; } return true; } -static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool* pNotOptimize, +static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool groupSort, bool* pNotOptimize, SNodeList** pSequencingNodes) { switch (nodeType(pNode)) { case QUERY_NODE_LOGIC_PLAN_SCAN: { SScanLogicNode* pScan = (SScanLogicNode*)pNode; - if (NULL != pScan->pGroupTags || TSDB_SYSTEM_TABLE == pScan->tableType) { + if ((!groupSort && NULL != pScan->pGroupTags) || TSDB_SYSTEM_TABLE == pScan->tableType) { *pNotOptimize = true; return TSDB_CODE_SUCCESS; } return nodesListMakeAppend(pSequencingNodes, (SNode*)pNode); } case QUERY_NODE_LOGIC_PLAN_JOIN: { - int32_t code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), + int32_t code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), groupSort, pNotOptimize, pSequencingNodes); if (TSDB_CODE_SUCCESS == code) { - code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 1), pNotOptimize, - pSequencingNodes); + code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 1), groupSort, + pNotOptimize, pSequencingNodes); } return code; } @@ -1121,13 +1121,13 @@ static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool* pNot return TSDB_CODE_SUCCESS; } - return sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), pNotOptimize, - pSequencingNodes); + return sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), groupSort, + pNotOptimize, pSequencingNodes); } -static int32_t sortPriKeyOptGetSequencingNodes(SLogicNode* pNode, SNodeList** pSequencingNodes) { +static int32_t sortPriKeyOptGetSequencingNodes(SLogicNode* pNode, bool groupSort, SNodeList** pSequencingNodes) { bool notOptimize = false; - int32_t code = sortPriKeyOptGetSequencingNodesImpl(pNode, ¬Optimize, pSequencingNodes); + int32_t code = sortPriKeyOptGetSequencingNodesImpl(pNode, groupSort, ¬Optimize, pSequencingNodes); if (TSDB_CODE_SUCCESS != code || notOptimize) { NODES_CLEAR_LIST(*pSequencingNodes); } @@ -1175,8 +1175,8 @@ static int32_t sortPriKeyOptApply(SOptimizeContext* pCxt, SLogicSubplan* pLogicS static int32_t sortPrimaryKeyOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SSortLogicNode* pSort) { SNodeList* pSequencingNodes = NULL; - int32_t code = - sortPriKeyOptGetSequencingNodes((SLogicNode*)nodesListGetNode(pSort->node.pChildren, 0), &pSequencingNodes); + int32_t code = sortPriKeyOptGetSequencingNodes((SLogicNode*)nodesListGetNode(pSort->node.pChildren, 0), + pSort->groupSort, &pSequencingNodes); if (TSDB_CODE_SUCCESS == code && NULL != pSequencingNodes) { code = sortPriKeyOptApply(pCxt, pLogicSubplan, pSort, pSequencingNodes); } diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index 35b2479a51..bde05d4116 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -194,6 +194,8 @@ typedef struct SQWorker { SMsgCb msgCb; SQWStat stat; int32_t *destroyed; + + int8_t nodeStopped; } SQWorker; typedef struct SQWorkerMgmt { @@ -228,9 +230,14 @@ typedef struct SQWorkerMgmt { case QW_PHASE_POST_FETCH: \ ctx->inFetch = 0; \ break; \ - default: \ + case QW_PHASE_PRE_QUERY: \ + case QW_PHASE_POST_QUERY: \ + case QW_PHASE_PRE_CQUERY: \ + case QW_PHASE_POST_CQUERY: \ atomic_store_8(&(ctx)->phase, _value); \ break; \ + default: \ + break; \ } \ } while (0) diff --git a/source/libs/qworker/src/qwUtil.c b/source/libs/qworker/src/qwUtil.c index fdd2775daa..7ee7c50c96 100644 --- a/source/libs/qworker/src/qwUtil.c +++ b/source/libs/qworker/src/qwUtil.c @@ -213,9 +213,15 @@ int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashAcquire(mgmt->ctxHash, id, sizeof(id)); + int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped); if (NULL == (*ctx)) { - QW_TASK_DLOG_E("task ctx not exist, may be dropped"); - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + if (!nodeStopped) { + QW_TASK_DLOG_E("task ctx not exist, may be dropped"); + QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + } else { + QW_TASK_DLOG_E("node stopped"); + QW_ERR_RET(TSDB_CODE_VND_STOPPED); + } } return TSDB_CODE_SUCCESS; @@ -226,9 +232,16 @@ int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id)); + int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped); + if (NULL == (*ctx)) { - QW_TASK_DLOG_E("task ctx not exist, may be dropped"); - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + if (!nodeStopped) { + QW_TASK_DLOG_E("task ctx not exist, may be dropped"); + QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + } else { + QW_TASK_DLOG_E("node stopped"); + QW_ERR_RET(TSDB_CODE_VND_STOPPED); + } } return TSDB_CODE_SUCCESS; diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index dcb7c02580..fedaa96ed9 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -551,7 +551,9 @@ _return: if (ctx) { QW_UPDATE_RSP_CODE(ctx, code); - QW_SET_PHASE(ctx, phase); + if (QW_PHASE_POST_CQUERY != phase) { + QW_SET_PHASE(ctx, phase); + } QW_UNLOCK(QW_WRITE, &ctx->lock); qwReleaseTaskCtx(mgmt, ctx); @@ -758,7 +760,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_LOCK(QW_WRITE, &ctx->lock); if (qComplete || (queryStop && (0 == atomic_load_8((int8_t *)&ctx->queryContinue))) || code) { // Note: query is not running anymore - QW_SET_PHASE(ctx, 0); + QW_SET_PHASE(ctx, QW_PHASE_POST_CQUERY); QW_UNLOCK(QW_WRITE, &ctx->lock); break; } @@ -1186,6 +1188,9 @@ void qWorkerStopAllTasks(void *qWorkerMgmt) { uint64_t qId, tId, sId; int32_t eId; int64_t rId = 0; + + atomic_store_8(&mgmt->nodeStopped, 1); + void *pIter = taosHashIterate(mgmt->ctxHash, NULL); while (pIter) { SQWTaskCtx *ctx = (SQWTaskCtx *)pIter; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 6f77769dec..2f991288ff 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -207,6 +207,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { if (ppTask) { SStreamTask* pTask = *ppTask; taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); + tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); /*if (pTask->timer) { * taosTmrStop(pTask->timer);*/ /*pTask->timer = NULL;*/ diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 6670bf463e..2460da25f4 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -192,7 +192,7 @@ void streamStateClose(SStreamState* pState) { } int32_t streamStateBegin(SStreamState* pState) { - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, NULL, NULL, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { tdbAbort(pState->pTdbState->db, pState->pTdbState->txn); return -1; @@ -208,7 +208,7 @@ int32_t streamStateCommit(SStreamState* pState) { return -1; } - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, NULL, NULL, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } @@ -220,7 +220,7 @@ int32_t streamStateAbort(SStreamState* pState) { return -1; } - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, NULL, NULL, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index e77a8d4be3..835e5d248e 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -89,45 +89,6 @@ // /\ UNCHANGED <> // -int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { - ASSERT(false && "deprecated"); - if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { - sNTrace(ths, "can not do follower commit"); - return -1; - } - - // maybe update commit index, leader notice me - if (newCommitIndex > ths->commitIndex) { - // has commit entry in local - if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - // advance commit index to sanpshot first - SSnapshot snapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { - SyncIndex commitBegin = ths->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - ths->commitIndex = snapshot.lastApplyIndex; - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - SyncIndex beginIndex = ths->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - ths->commitIndex = newCommitIndex; - - // call back Wal - int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - - code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - } - - return 0; -} - SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) { SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen); if (pEntry == NULL) { @@ -232,256 +193,3 @@ _IGNORE: rpcFreeCont(rpcRsp.pCont); return 0; } - -int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - SyncAppendEntries* pMsg = pRpcMsg->pCont; - SRpcMsg rpcRsp = {0}; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntries(ths, pMsg, "not in my config"); - goto _IGNORE; - } - - // prepare response msg - int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId); - if (code != 0) { - syncLogRecvAppendEntries(ths, pMsg, "build rsp error"); - goto _IGNORE; - } - - SyncAppendEntriesReply* pReply = rpcRsp.pCont; - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->raftStore.currentTerm; - pReply->success = false; - // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - pReply->matchIndex = SYNC_INDEX_INVALID; - pReply->lastSendIndex = pMsg->prevLogIndex + 1; - pReply->startTime = ths->startTime; - - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, small term"); - goto _SEND_RESPONSE; - } - - if (pMsg->term > ths->raftStore.currentTerm) { - pReply->term = pMsg->term; - } - - syncNodeStepDown(ths, pMsg->term); - syncNodeResetElectTimer(ths); - - SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore); - SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - - if (pMsg->prevLogIndex > lastIndex) { - syncLogRecvAppendEntries(ths, pMsg, "reject, index not match"); - goto _SEND_RESPONSE; - } - - if (pMsg->prevLogIndex >= startIndex) { - SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1); - // ASSERT(myPreLogTerm != SYNC_TERM_INVALID); - if (myPreLogTerm == SYNC_TERM_INVALID) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid"); - goto _SEND_RESPONSE; - } - - if (myPreLogTerm != pMsg->prevLogTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match"); - goto _SEND_RESPONSE; - } - } - - // accept - pReply->success = true; - bool hasAppendEntries = pMsg->dataLen > 0; - if (hasAppendEntries) { - SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg); - ASSERT(pAppendEntry != NULL); - - SyncIndex appendIndex = pMsg->prevLogIndex + 1; - - LRUHandle* hLocal = NULL; - LRUHandle* hAppend = NULL; - - int32_t code = 0; - SSyncRaftEntry* pLocalEntry = NULL; - SLRUCache* pCache = ths->pLogStore->pCache; - hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex)); - if (hLocal) { - pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal); - code = 0; - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, appendIndex); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry); - } - - if (code == 0) { - // get local entry success - - if (pLocalEntry->term == pAppendEntry->term) { - // do nothing - sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex); - - } else { - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - ASSERT(pAppendEntry->index == appendIndex); - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - } - - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // log not exist - - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - syncEntryDestroy(pLocalEntry); - syncEntryDestroy(pAppendEntry); - goto _IGNORE; - } - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - - } else { - // get local entry success - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex, - terrno); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - } - - // update match index - pReply->matchIndex = pAppendEntry->index; - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - } else { - // no append entries, do nothing - // maybe has extra entries, no harm - - // update match index - pReply->matchIndex = pMsg->prevLogIndex; - } - - // maybe update commit index, leader notice me - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - syncLogRecvAppendEntries(ths, pMsg, "accept"); - goto _SEND_RESPONSE; - -_IGNORE: - rpcFreeCont(rpcRsp.pCont); - return 0; - -_SEND_RESPONSE: - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp); - return 0; -} diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 8157a5a14f..44a29da3ea 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } return 0; } - -int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config"); - return 0; - } - - // drop stale response - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); - return 0; - } - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term > ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - syncNodeStepDown(ths, pMsg->term); - return -1; - } - - ASSERT(pMsg->term == ths->raftStore.currentTerm); - - if (pMsg->success) { - SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - if (pMsg->matchIndex > oldMatchIndex) { - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - syncMaybeAdvanceCommitIndex(ths); - - // maybe update minMatchIndex - ths->minMatchIndex = syncMinMatchIndex(ths); - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - } - - // send next append entries - SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId)); - ASSERT(pState != NULL); - - if (pMsg->lastSendIndex == pState->lastSendIndex) { - int64_t timeNow = taosGetTimestampMs(); - int64_t elapsed = timeNow - pState->lastSendTime; - sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex); - - syncNodeReplicateOne(ths, &(pMsg->srcId), true); - } - } - - syncLogRecvAppendEntriesReply(ths, pMsg, "process"); - return 0; -} diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 286cf4daf5..67ed1e0701 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -43,148 +43,6 @@ // IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex] // /\ UNCHANGED <> // -void syncOneReplicaAdvance(SSyncNode* pSyncNode) { - ASSERT(false && "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - if (pSyncNode->replicaNum != 1) { - sNError(pSyncNode, "not one replica, can not advance commit index"); - return; - } - - // advance commit index to snapshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // advance commit index as large as possible - SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); - if (lastIndex > pSyncNode->commitIndex) { - sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex); - pSyncNode->commitIndex = lastIndex; - } - - // call back Wal - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (pSyncNode->commitIndex > walCommitVer) { - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - } -} - -void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { - ASSERTS(false, "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - // advance commit index to sanpshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // update commit index - SyncIndex newCommitIndex = pSyncNode->commitIndex; - for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) { - bool agree = syncAgree(pSyncNode, index); - - if (agree) { - // term - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, index); - - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index); - return; - } - } - // cannot commit, even if quorum agree. need check term! - if (pEntry->term <= pSyncNode->raftStore.currentTerm) { - // update commit index - newCommitIndex = index; - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - break; - } else { - sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index, - pEntry->term); - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - - // advance commit index as large as possible - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (walCommitVer > newCommitIndex) { - newCommitIndex = walCommitVer; - } - - // maybe execute fsm - if (newCommitIndex > pSyncNode->commitIndex) { - SyncIndex beginIndex = pSyncNode->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - pSyncNode->commitIndex = newCommitIndex; - - // call back Wal - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - - // execute fsm - if (pSyncNode != NULL && pSyncNode->pFsm != NULL) { - int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex, - endIndex); - return; - } - } - } -} bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) { // I am leader, I agree @@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) { return c; } -int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { - return pSyncNode->quorum; - -#if 0 - int32_t quorum = 1; // self - - int64_t timeNow = taosGetTimestampMs(); - for (int i = 0; i < pSyncNode->peersNum; ++i) { - int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]); - - int64_t recvTimeDiff = TABS(peerRecvTime - timeNow); - int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime); - int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode)); - - /* - int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow); - int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime); - int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode)); - */ - - int32_t addQuorum = 0; - - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 1; - } else { - if (logDiff < SYNC_ADD_QUORUM_COUNT) { - addQuorum = 1; - } else { - addQuorum = 0; - } - } - } else { - addQuorum = 0; - } - - /* - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - addQuorum = 1; - } else { - addQuorum = 0; - } - - if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 0; - } - */ - - quorum += addQuorum; - } - - ASSERT(quorum <= pSyncNode->replicaNum); - - if (quorum < pSyncNode->quorum) { - quorum = pSyncNode->quorum; - } - - return quorum; -#endif -} - -/* -bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { - int agreeCount = 0; - for (int i = 0; i < pSyncNode->replicaNum; ++i) { - if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) { - ++agreeCount; - } - if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) { - return true; - } - } - return false; -} -*/ +int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; } bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) { int count = 0; diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index cd3ffc33e3..682ace83ec 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { for (int i = 0; i < pNode->peersNum; ++i) { SRpcMsg rpcMsg = {0}; ret = syncBuildRequestVote(&rpcMsg, pNode->vgId); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr()); + continue; + } SyncRequestVote* pMsg = rpcMsg.pCont; pMsg->srcId = pNode->myRaftId; @@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { pMsg->term = pNode->raftStore.currentTerm; ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr()); + continue; + } ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr); + continue; + } } - - return ret; + return 0; } int32_t syncNodeElect(SSyncNode* pSyncNode) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index a339cb9857..ea22ac7bb5 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { goto _DEL_WAL; } else { - lastApplyIndex -= SYNC_VNODE_LOG_RETENTION; - SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore); @@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { if (pSyncNode->replicaNum > 1) { // multi replicas + lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1); + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode); @@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) { return state; } -#if 0 -int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) { - if (index < SYNC_INDEX_BEGIN) { - return -1; - } - - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - SSyncRaftEntry* pEntry = NULL; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - if (pEntry != NULL) { - syncEntryDestroy(pEntry); - } - syncNodeRelease(pSyncNode); - return -1; - } - ASSERT(pEntry != NULL); - - pSnapshot->data = NULL; - pSnapshot->lastApplyIndex = index; - pSnapshot->lastApplyTerm = pEntry->term; - pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index); - - syncEntryDestroy(pEntry); - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex; - - sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); - SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; - - for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) { - if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex && - (pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) { - lastIndex = (pSyncNode->raftCfg.configIndexArr)[i]; - } - } - sMeta->lastConfigIndex = lastIndex; - sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex, - sMeta->lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} -#endif - SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) { ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; @@ -898,7 +826,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { sInfo("vgId:%d, start to open sync node, replica:%d selfIndex:%d", pSyncNode->vgId, pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->replicaNum; ++i) { SNodeInfo* pNode = &pCfg->nodeInfo[i]; - (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); + tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); sInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pSyncNode->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId, pNode->clusterId); } @@ -1031,9 +959,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->commitIndex = commitIndex; sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); + // restore log store on need if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { + sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr()); goto _error; } + // timer ms init pSyncNode->pingBaseLine = PING_TIMER_MS; pSyncNode->electBaseLine = tsElectInterval; @@ -1096,10 +1027,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->changing = false; // replication mgr - syncNodeLogReplMgrInit(pSyncNode); + if (syncNodeLogReplMgrInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // peer state - syncNodePeerStateInit(pSyncNode); + if (syncNodePeerStateInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // // min match index @@ -1194,27 +1131,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) { int32_t ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); - return ret; -} - -void syncNodeStartOld(SSyncNode* pSyncNode) { - // start raft - if (pSyncNode->replicaNum == 1) { - raftStoreNextTerm(pSyncNode); - syncNodeBecomeLeader(pSyncNode, "one replica start"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - } else { - syncNodeBecomeFollower(pSyncNode, "first start"); + if (ret != 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); } - - int32_t ret = 0; - ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + return ret; } int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -1225,11 +1145,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { // reset elect timer, long enough int32_t electMS = TIMER_MAX_MS; int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } - ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } return ret; } @@ -1703,8 +1628,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde _END: // log end config change - sNInfo(pSyncNode, "end do config change, from %d to %d", pSyncNode->vgId, oldConfig.replicaNum, - pNewConfig->replicaNum); + sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.replicaNum, pNewConfig->replicaNum); } // raft state change -------------- @@ -1819,12 +1743,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { pSyncNode->leaderCache = pSyncNode->myRaftId; for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) { - // maybe overwrite myself, no harm - // just do it! - - // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; - - // maybe wal is deleted SyncIndex lastIndex; SyncTerm lastTerm; int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm); @@ -1886,7 +1804,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); + bool granted = voteGrantedMajority(pSyncNode->pVotesGranted); + if (!granted) { + sError("vgId:%d, not granted by majority.", pSyncNode->vgId); + return; + } syncNodeBecomeLeader(pSyncNode, "candidate to leader"); sNTrace(pSyncNode, "state change syncNodeCandidate2Leader"); @@ -1902,20 +1824,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); } -void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); - syncNodeBecomeLeader(pSyncNode, "candidate to leader"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - if (pSyncNode->replicaNum > 1) { - syncNodeReplicate(pSyncNode); - } -} - bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); } int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) { @@ -1961,7 +1869,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { // need assert void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) { ASSERT(term == pSyncNode->raftStore.currentTerm); - ASSERT(!raftStoreHasVoted(pSyncNode)); + bool voted = raftStoreHasVoted(pSyncNode); + ASSERT(!voted); raftStoreVote(pSyncNode, pRaftId); } @@ -2478,7 +2387,7 @@ static int32_t syncNodeAppendNoopOld(SSyncNode* ths) { LRUHandle* h = NULL; if (ths->state == TAOS_SYNC_STATE_LEADER) { - int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); + int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false); if (code != 0) { sError("append noop error"); return -1; @@ -2639,24 +2548,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { return 0; } -int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - ASSERT(false && "deprecated"); - SyncLocalCmd* pMsg = pRpcMsg->pCont; - syncLogRecvLocalCmd(ths, pMsg, ""); - - if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->currentTerm); - - } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - } else { - sError("error local cmd"); - } - - return 0; -} - // TLA+ Spec // ClientRequest(i, v) == // /\ state[i] = Leader @@ -2701,96 +2592,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn } } -int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) { - sNTrace(ths, "on client request"); - - int32_t ret = 0; - int32_t code = 0; - - SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->raftStore.currentTerm; - SSyncRaftEntry* pEntry; - - if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index); - } else { - pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index); - } - - LRUHandle* h = NULL; - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - // append entry - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); - if (code != 0) { - if (ths->replicaNum == 1) { - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - - } else { - // del resp mgr, call FpCommitCb - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = SYNC_INDEX_INVALID, - .isWeak = pEntry->isWeak, - .code = -1, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = 0, - }; - ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta); - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - } - } - - syncCacheEntry(ths->pLogStore, pEntry, &h); - - // if mulit replica, start replicate right now - if (ths->replicaNum > 1) { - syncNodeReplicate(ths); - } - - // if only myself, maybe commit right now - if (ths->replicaNum == 1) { - if (syncNodeIsMnode(ths)) { - syncMaybeAdvanceCommitIndex(ths); - } else { - syncOneReplicaAdvance(ths); - } - } - } - - if (pRetIndex != NULL) { - if (ret == 0 && pEntry != NULL) { - *pRetIndex = pEntry->index; - } else { - *pRetIndex = SYNC_INDEX_INVALID; - } - } - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return ret; -} - const char* syncStr(ESyncState state) { switch (state) { case TAOS_SYNC_STATE_FOLLOWER: @@ -2895,129 +2696,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) { return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1); } -int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { - ASSERT(false); - if (beginIndex > endIndex) { - return 0; - } - - if (ths == NULL) { - return -1; - } - - if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) { - // advance commit index to sanpshot first - SSnapshot snapshot = {0}; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) { - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex); - - // update begin index - beginIndex = snapshot.lastApplyIndex + 1; - } - } - - int32_t code = 0; - ESyncState state = flag; - - sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex); - - // execute fsm - if (ths->pFsm != NULL) { - for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - if (i != SYNC_INDEX_INVALID) { - SSyncRaftEntry* pEntry; - SLRUCache* pCache = ths->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &i, sizeof(i)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, i); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); - // ASSERT(code == 0); - // ASSERT(pEntry != NULL); - if (code != 0 || pEntry == NULL) { - sNError(ths, "get log entry error"); - sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr()); - continue; - } - } - - SRpcMsg rpcMsg = {0}; - syncEntry2OriginalRpc(pEntry, &rpcMsg); - - sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType)); - - // user commit - if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) { - bool internalExecute = true; - if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) { - internalExecute = false; - } - - sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute, - TMSG_INFO(pEntry->msgType)); - - // execute fsm in apply thread, or execute outside syncPropose - if (internalExecute) { - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index), - .isWeak = pEntry->isWeak, - .code = 0, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = flag, - }; - - syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); - ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta); - } - } - -#if 0 - // execute in pre-commit - // leader transfer - if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) { - code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry); - ASSERT(code == 0); - } -#endif - - // restore finish - // if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok - if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - if (ths->restoreFinish == false) { - if (ths->pFsm->FpRestoreFinishCb != NULL) { - ths->pFsm->FpRestoreFinishCb(ths->pFsm); - } - ths->restoreFinish = true; - - int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime; - sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay); - } - } - - rpcFreeCont(rpcMsg.pCont); - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - } - return 0; -} - bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) { for (int32_t i = 0; i < ths->replicaNum; ++i) { if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) { diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index b61fc2e90d..6cc517fda0 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -364,7 +364,11 @@ _out: return ret; } -int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { +static inline bool syncLogStoreNeedFlush(SSyncRaftEntry* pEntry, int32_t replicaNum) { + return (replicaNum > 1) && (pEntry->originalRpcType == TDMT_VND_COMMIT); +} + +int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncNode* pNode, SSyncRaftEntry* pEntry) { ASSERT(pEntry->index >= 0); SyncIndex lastVer = pLogStore->syncLogLastIndex(pLogStore); if (lastVer >= pEntry->index && pLogStore->syncLogTruncate(pLogStore, pEntry->index) < 0) { @@ -374,7 +378,8 @@ int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { lastVer = pLogStore->syncLogLastIndex(pLogStore); ASSERT(pEntry->index == lastVer + 1); - if (pLogStore->syncLogAppendEntry(pLogStore, pEntry) < 0) { + bool doFsync = syncLogStoreNeedFlush(pEntry, pNode->replicaNum); + if (pLogStore->syncLogAppendEntry(pLogStore, pEntry, doFsync) < 0) { sError("failed to append sync log entry since %s. index:%" PRId64 ", term:%" PRId64 "", terrstr(), pEntry->index, pEntry->term); return -1; @@ -436,7 +441,7 @@ int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* p (void)syncNodeReplicateWithoutLock(pNode); // persist - if (syncLogStorePersist(pLogStore, pEntry) < 0) { + if (syncLogStorePersist(pLogStore, pNode, pEntry) < 0) { sError("vgId:%d, failed to persist sync log entry from buffer since %s. index:%" PRId64, pNode->vgId, terrstr(), pEntry->index); goto _out; @@ -940,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) { for (int i = 0; i < TSDB_MAX_REPLICA; i++) { ASSERT(pNode->logReplMgrs[i] == NULL); pNode->logReplMgrs[i] = syncLogReplMgrCreate(); + if (pNode->logReplMgrs[i] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } pNode->logReplMgrs[i]->peerId = i; - ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory."); } return 0; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 03c3fe154d..e6569d9974 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -23,7 +23,7 @@ // public function static int32_t raftLogRestoreFromSnapshot(struct SSyncLogStore* pLogStore, SyncIndex snapshotIndex); -static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forceSync); static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); static bool raftLogExist(struct SSyncLogStore* pLogStore, SyncIndex index); static int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); @@ -192,7 +192,7 @@ SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore) { return SYNC_TERM_INVALID; } -static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forceSync) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; @@ -219,9 +219,7 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr ASSERT(pEntry->index == index); - if (pEntry->originalRpcType == TDMT_VND_COMMIT) { - walFsync(pWal, true); - } + walFsync(pWal, forceSync); sNTrace(pData->pSyncNode, "write index:%" PRId64 ", type:%s, origin type:%s, elapsed:%" PRId64, pEntry->index, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType), tsElapsed); diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index 1aa476e84e..3df203221b 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -48,92 +48,6 @@ int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg); -int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) { - ASSERT(false && "deprecated"); - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - - if (snapshot) { - // maybe start snapshot - SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); - SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); - if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) { - sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex, - logStartIndex, logEndIndex); - // start snapshot - int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); - } - } - - // pre index, pre term - SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); - SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - - // prepare entry - SRpcMsg rpcMsg = {0}; - SyncAppendEntries* pMsg = NULL; - - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex)); - int32_t code = 0; - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - code = 0; - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex); - - code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); - } - - if (code == 0) { - ASSERT(pEntry != NULL); - - code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - memcpy(pMsg->data, pEntry, pEntry->bytes); - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // no entry in log - code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - } else { - sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex); - return -1; - } - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - // prepare msg - ASSERT(pMsg != NULL); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->raftStore.currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - pMsg->privateTerm = 0; - // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); - - // send msg - syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg); - return 0; -} - int32_t syncNodeReplicate(SSyncNode* pNode) { SSyncLogBuffer* pBuf = pNode->pLogBuf; taosThreadMutexLock(&pBuf->mutex); @@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) { return 0; } -int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) { - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - return -1; - } - - sNTrace(pSyncNode, "do replicate"); - - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); - ret = syncNodeReplicateOne(pSyncNode, pDestId, true); - if (ret != 0) { - sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId)); - } - } - - return 0; -} - int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { SyncAppendEntries* pMsg = pRpcMsg->pCont; pMsg->destId = *destRaftId; @@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI return 0; } -int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { - int32_t ret = 0; - SyncAppendEntries* pMsg = pRpcMsg->pCont; - if (pMsg == NULL) { - sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId); - return 0; - } - - SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId); - if (pState == NULL) { - sError("vgId:%d, replica maybe dropped", pSyncNode->vgId); - return 0; - } - - // save index, otherwise pMsg will be free by rpc - SyncIndex saveLastSendIndex = pState->lastSendIndex; - bool update = false; - if (pMsg->dataLen > 0) { - saveLastSendIndex = pMsg->prevLogIndex + 1; - update = true; - } - - syncLogSendAppendEntries(pSyncNode, pMsg, ""); - syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg); - - if (update) { - pState->lastSendIndex = saveLastSendIndex; - pState->lastSendTime = taosGetTimestampMs(); - } - - return ret; -} - int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { int32_t ret = 0; SyncAppendEntries* pMsg = pRpcMsg->pCont; diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 79d37b7674..4f0682a617 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -1063,11 +1063,11 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const } else { int nLeftKey = kLen; // pack partial key and nextPgno - memcpy(pCell + nHeader, pKey, nLocal - 4); - nLeft -= nLocal - 4; - nLeftKey -= nLocal - 4; + memcpy(pCell + nHeader, pKey, nLocal - nHeader - sizeof(pgno)); + nLeft -= nLocal - nHeader - sizeof(pgno); + nLeftKey -= nLocal - nHeader - sizeof(pgno); - memcpy(pCell + nHeader + nLocal - 4, &pgno, sizeof(pgno)); + memcpy(pCell + nLocal - sizeof(pgno), &pgno, sizeof(pgno)); int lastKeyPageSpace = 0; // pack left key & val to ovpages @@ -1087,9 +1087,12 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const if (lastKeyPage) { if (lastKeyPageSpace >= vLen) { - memcpy(pBuf + kLen - nLeftKey, pVal, vLen); + if (vLen > 0) { + memcpy(pBuf + kLen - nLeftKey, pVal, vLen); + + nLeft -= vLen; + } - nLeft -= vLen; pgno = 0; } else { memcpy(pBuf + kLen - nLeftKey, pVal, lastKeyPageSpace); @@ -1111,7 +1114,7 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const } } - memcpy(pBuf + kLen - nLeft, &pgno, sizeof(pgno)); + memcpy(pBuf + bytes, &pgno, sizeof(pgno)); ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); if (ret < 0) { @@ -1313,11 +1316,11 @@ static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, } TDB_CELLDECODER_SET_FREE_KEY(pDecoder); - memcpy(pDecoder->pKey, pCell + nHeader, nLocal - 4); - nLeft -= nLocal - 4; - nLeftKey -= nLocal - 4; + memcpy(pDecoder->pKey, pCell + nHeader, nLocal - nHeader - sizeof(pgno)); + nLeft -= nLocal - nHeader - sizeof(pgno); + nLeftKey -= nLocal - nHeader - sizeof(pgno); - memcpy(&pgno, pCell + nHeader + nLocal - 4, sizeof(pgno)); + memcpy(&pgno, pCell + nLocal - sizeof(pgno), sizeof(pgno)); int lastKeyPageSpace = 0; // load left key & val to ovpages @@ -1343,9 +1346,11 @@ static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, if (lastKeyPage) { if (lastKeyPageSpace >= vLen) { - pDecoder->pVal = ofpCell + kLen - nLeftKey; + if (vLen > 0) { + pDecoder->pVal = ofpCell + kLen - nLeftKey; - nLeft -= vLen; + nLeft -= vLen; + } pgno = 0; } else { // read partial val to local diff --git a/source/libs/transport/src/tmsgcb.c b/source/libs/transport/src/tmsgcb.c index 4131619ed9..af2528bc92 100644 --- a/source/libs/transport/src/tmsgcb.c +++ b/source/libs/transport/src/tmsgcb.c @@ -59,6 +59,12 @@ void tmsgReleaseHandle(SRpcHandleInfo* pHandle, int8_t type) { (*defaultMsgCb.re void tmsgReportStartup(const char* name, const char* desc) { (*defaultMsgCb.reportStartupFp)(name, desc); } -int32_t tmsgUpdateDnodeInfo(int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port) { - return (*defaultMsgCb.updateDnodeInfoFp)(defaultMsgCb.data, dnodeId, clusterId, fqdn, port); +void tmsgUpdateDnodeInfo(int32_t* dnodeId, int64_t* clusterId, char* fqdn, uint16_t* port) { + (*defaultMsgCb.updateDnodeInfoFp)(defaultMsgCb.data, dnodeId, clusterId, fqdn, port); } + +void tmsgUpdateDnodeEpSet(SEpSet* epset) { + for (int32_t i = 0; i < epset->numOfEps; ++i) { + tmsgUpdateDnodeInfo(NULL, NULL, epset->eps[i].fqdn, &epset->eps[i].port); + } +} \ No newline at end of file diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 44e88a4dcc..a547378967 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -325,6 +325,35 @@ bool walLogEntriesComplete(const SWal* pWal) { return complete; } +int walTrimIdxFile(SWal* pWal, int32_t fileIdx) { + SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx); + ASSERT(pFileInfo != NULL); + char fnameStr[WAL_FILE_LEN]; + walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr); + + int64_t fileSize = 0; + taosStatFile(fnameStr, &fileSize, NULL); + int64_t records = TMAX(0, pFileInfo->lastVer - pFileInfo->firstVer + 1); + int64_t lastEndOffset = records * sizeof(SWalIdxEntry); + + if (fileSize <= lastEndOffset) { + return 0; + } + + TdFilePtr pFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + wInfo("vgId:%d, trim idx file. file: %s, size: %" PRId64 ", offset: %" PRId64, pWal->cfg.vgId, fnameStr, fileSize, + lastEndOffset); + + taosFtruncateFile(pFile, lastEndOffset); + taosCloseFile(&pFile); + return 0; +} + int walCheckAndRepairMeta(SWal* pWal) { // load log files, get first/snapshot/last version info const char* logPattern = "^[0-9]+.log$"; @@ -402,6 +431,8 @@ int walCheckAndRepairMeta(SWal* pWal) { } updateMeta = true; + (void)walTrimIdxFile(pWal, fileIdx); + int64_t lastVer = walScanLogGetLastVer(pWal, fileIdx); if (lastVer < 0) { if (terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) { @@ -567,6 +598,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { goto _err; } + int64_t count = 0; while (idxEntry.ver < pFileInfo->lastVer) { ASSERT(idxEntry.ver == ckHead.head.version); @@ -578,11 +610,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { idxEntry.offset, fLogNameStr); goto _err; } - wWarn("vgId:%d, wal idx append new entry %" PRId64 " %" PRId64, pWal->cfg.vgId, idxEntry.ver, idxEntry.offset); if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) { wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr); goto _err; } + count++; } if (taosFsyncFile(pIdxFile) < 0) { @@ -590,6 +622,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { goto _err; } + if (count > 0) { + wInfo("vgId:%d, rebuilt %" PRId64 " wal idx entries until lastVer: %" PRId64, pWal->cfg.vgId, count, + pFileInfo->lastVer); + } + (void)taosCloseFile(&pLogFile); (void)taosCloseFile(&pIdxFile); return 0; diff --git a/source/libs/wal/src/walRef.c b/source/libs/wal/src/walRef.c index e86111109c..43470f4c82 100644 --- a/source/libs/wal/src/walRef.c +++ b/source/libs/wal/src/walRef.c @@ -77,14 +77,41 @@ void walUnrefVer(SWalRef *pRef) { } #endif -SWalRef *walRefCommittedVer(SWal *pWal) { - SWalRef *pRef = walOpenRef(pWal); +SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) { if (pRef == NULL) { - return NULL; + pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } } taosThreadMutexLock(&pWal->mutex); - int64_t ver = walGetCommittedVer(pWal); + int64_t ver = walGetFirstVer(pWal); + + wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver); + + pRef->refVer = ver; + // bsearch in fileSet + SWalFileInfo tmpInfo; + tmpInfo.firstVer = ver; + SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); + ASSERT(pRet != NULL); + pRef->refFile = pRet->firstVer; + + taosThreadMutexUnlock(&pWal->mutex); + return pRef; +} + +SWalRef *walRefCommittedVer(SWal *pWal) { + SWalRef *pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } + taosThreadMutexLock(&pWal->mutex); + + int64_t ver = walGetCommittedVer(pWal); + + wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver); pRef->refVer = ver; // bsearch in fileSet diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index db31692da9..d4ea526b78 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -637,11 +637,6 @@ int32_t walWrite(SWal *pWal, int64_t index, tmsg_t msgType, const void *body, in void walFsync(SWal *pWal, bool forceFsync) { taosThreadMutexLock(&pWal->mutex); if (forceFsync || (pWal->cfg.level == TAOS_WAL_FSYNC && pWal->cfg.fsyncPeriod == 0)) { - wTrace("vgId:%d, fileId:%" PRId64 ".idx, do fsync", pWal->cfg.vgId, walGetCurFileFirstVer(pWal)); - if (taosFsyncFile(pWal->pIdxFile) < 0) { - wError("vgId:%d, file:%" PRId64 ".idx, fsync failed since %s", pWal->cfg.vgId, walGetCurFileFirstVer(pWal), - strerror(errno)); - } wTrace("vgId:%d, fileId:%" PRId64 ".log, do fsync", pWal->cfg.vgId, walGetCurFileFirstVer(pWal)); if (taosFsyncFile(pWal->pLogFile) < 0) { wError("vgId:%d, file:%" PRId64 ".log, fsync failed since %s", pWal->cfg.vgId, walGetCurFileFirstVer(pWal), diff --git a/source/os/src/osMath.c b/source/os/src/osMath.c index dddadd5ff6..41c8c9257a 100644 --- a/source/os/src/osMath.c +++ b/source/os/src/osMath.c @@ -32,7 +32,18 @@ void swapStr(char* j, char* J, int width) { } #endif -// todo refactor: 1) move away; 2) use merge sort instead; 3) qsort is not a stable sort actually. -void taosSort(void* arr, int64_t sz, int64_t width, __compar_fn_t compar) { - qsort(arr, sz, width, compar); +int qsortHelper(const void* p1, const void* p2, const void* param) { + __compar_fn_t comparFn = param; + return comparFn(p1, p2); } + +// todo refactor: 1) move away; 2) use merge sort instead; 3) qsort is not a stable sort actually. +void taosSort(void* base, int64_t sz, int64_t width, __compar_fn_t compar) { +#ifdef _ALPINE + void* param = compar; + taosqsort(base, width, sz, param, qsortHelper); +#else + qsort(base, sz, width, compar); +#endif +} + diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 7521ae4e0f..aeaa4fcafd 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -834,7 +834,11 @@ int32_t taosGetSystemUUID(char *uid, int32_t uidlen) { uuid_generate(uuid); // it's caller's responsibility to make enough space for `uid`, that's 36-char + 1-null uuid_unparse_lower(uuid, buf); - memcpy(uid, buf, uidlen); + int n = snprintf(uid, uidlen, "%.*s", (int)sizeof(buf), buf); // though less performance, much safer + if (n >= uidlen) { + // target buffer is too small + return -1; + } return 0; #else int len = 0; diff --git a/source/os/src/osTime.c b/source/os/src/osTime.c index cd4324a592..685693a709 100644 --- a/source/os/src/osTime.c +++ b/source/os/src/osTime.c @@ -33,6 +33,11 @@ #include //#define TM_YEAR_BASE 1970 //origin #define TM_YEAR_BASE 1900 // slguan + +// This magic number is the number of 100 nanosecond intervals since January 1, 1601 (UTC) +// until 00:00:00 January 1, 1970 +static const uint64_t TIMEEPOCH = ((uint64_t)116444736000000000ULL); + /* * We do not implement alternate representations. However, we always * check whether a given modifier is allowed for a certain conversion. @@ -341,15 +346,17 @@ char *taosStrpTime(const char *buf, const char *fmt, struct tm *tm) { int32_t taosGetTimeOfDay(struct timeval *tv) { #ifdef WINDOWS - time_t t; - t = taosGetTimestampSec(); - SYSTEMTIME st; - GetLocalTime(&st); + LARGE_INTEGER t; + FILETIME f; - tv->tv_sec = (long)t; - tv->tv_usec = st.wMilliseconds * 1000; + GetSystemTimeAsFileTime(&f); + t.QuadPart = f.dwHighDateTime; + t.QuadPart <<= 32; + t.QuadPart |= f.dwLowDateTime; - return 0; + t.QuadPart -= TIMEEPOCH; + tv->tv_sec = t.QuadPart / 10000000; + tv->tv_usec = (t.QuadPart % 10000000) / 10; #else return gettimeofday(tv, NULL); #endif @@ -550,37 +557,13 @@ int32_t taosClockGetTime(int clock_id, struct timespec *pTS) { #ifdef WINDOWS LARGE_INTEGER t; FILETIME f; - static FILETIME ff; - static SYSTEMTIME ss; - static LARGE_INTEGER offset; - - static int8_t offsetInit = 0; - static volatile bool offsetInitFinished = false; - int8_t old = atomic_val_compare_exchange_8(&offsetInit, 0, 1); - if (0 == old) { - ss.wYear = 1970; - ss.wMonth = 1; - ss.wDay = 1; - ss.wHour = 0; - ss.wMinute = 0; - ss.wSecond = 0; - ss.wMilliseconds = 0; - SystemTimeToFileTime(&ss, &ff); - offset.QuadPart = ff.dwHighDateTime; - offset.QuadPart <<= 32; - offset.QuadPart |= ff.dwLowDateTime; - offsetInitFinished = true; - } else { - while (!offsetInitFinished) - ; // Ensure initialization is completed. - } GetSystemTimeAsFileTime(&f); t.QuadPart = f.dwHighDateTime; t.QuadPart <<= 32; t.QuadPart |= f.dwLowDateTime; - t.QuadPart -= offset.QuadPart; + t.QuadPart -= TIMEEPOCH; pTS->tv_sec = t.QuadPart / 10000000; pTS->tv_nsec = (t.QuadPart % 10000000) * 100; return (0); diff --git a/source/util/src/talgo.c b/source/util/src/talgo.c index d9319485b7..a06aac6afe 100644 --- a/source/util/src/talgo.c +++ b/source/util/src/talgo.c @@ -41,12 +41,6 @@ static void median(void *src, int64_t size, int64_t s, int64_t e, const void *pa ASSERT(comparFn(elePtrAt(src, size, mid), elePtrAt(src, size, s), param) <= 0 && comparFn(elePtrAt(src, size, s), elePtrAt(src, size, e), param) <= 0); - -#ifdef _DEBUG_VIEW -// tTagsPrints(src[s], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -// tTagsPrints(src[mid], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -// tTagsPrints(src[e], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -#endif } static void tInsertSort(void *src, int64_t size, int32_t s, int32_t e, const void *param, __ext_compar_fn_t comparFn, @@ -278,14 +272,4 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar, } taosMemoryFree(buf); - /* - char *buf = taosMemoryCalloc(1, size); - - for (i = len - 1; i > 0; i--) { - doswap(elePtrAt(base, size, 0), elePtrAt(base, size, i)); - taosheapadjust(base, size, 0, i - 1, parcompar, compar, parswap, swap, maxroot); - } - - taosMemoryFreeClear(buf); - */ } diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 0eec9fd48f..8da1a1ec4a 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -20,7 +20,10 @@ // todo refactor API SArray* taosArrayInit(size_t size, size_t elemSize) { - assert(elemSize > 0); + if (elemSize == 0) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } if (size < TARRAY_MIN_SIZE) { size = TARRAY_MIN_SIZE; @@ -116,8 +119,6 @@ void* taosArrayAddBatch(SArray* pArray, const void* pData, int32_t nEles) { } void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)) { - assert(pArray); - size_t size = pArray->size; if (size <= 1) { return; @@ -156,8 +157,6 @@ void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp) } void taosArrayRemoveDuplicateP(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)) { - assert(pArray); - size_t size = pArray->size; if (size <= 1) { return; @@ -215,11 +214,10 @@ void* taosArrayReserve(SArray* pArray, int32_t num) { } void* taosArrayPop(SArray* pArray) { - assert(pArray != NULL); - if (pArray->size == 0) { return NULL; } + pArray->size -= 1; return TARRAY_GET_ELEM(pArray, pArray->size); } @@ -228,16 +226,21 @@ void* taosArrayGet(const SArray* pArray, size_t index) { if (NULL == pArray) { return NULL; } - assert(index < pArray->size); + + if (index >= pArray->size) { + uError("index is out of range, current:%"PRIzu" max:%d", index, pArray->capacity); + return NULL; + } + return TARRAY_GET_ELEM(pArray, index); } void* taosArrayGetP(const SArray* pArray, size_t index) { - assert(index < pArray->size); - - void* d = TARRAY_GET_ELEM(pArray, index); - - return *(void**)d; + void** p = taosArrayGet(pArray, index); + if (p == NULL) { + return NULL; + } + return *p; } void* taosArrayGetLast(const SArray* pArray) { @@ -322,9 +325,12 @@ void taosArrayRemove(SArray* pArray, size_t index) { } SArray* taosArrayFromList(const void* src, size_t size, size_t elemSize) { - assert(src != NULL && elemSize > 0); - SArray* pDst = taosArrayInit(size, elemSize); + if (elemSize <= 0) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + SArray* pDst = taosArrayInit(size, elemSize); memcpy(pDst->pData, src, elemSize * size); pDst->size = size; @@ -332,8 +338,6 @@ SArray* taosArrayFromList(const void* src, size_t size, size_t elemSize) { } SArray* taosArrayDup(const SArray* pSrc, __array_item_dup_fn_t fn) { - assert(pSrc != NULL); - if (pSrc->size == 0) { // empty array list return taosArrayInit(8, pSrc->elemSize); } @@ -425,14 +429,10 @@ void taosArrayDestroyEx(SArray* pArray, FDelete fp) { } void taosArraySort(SArray* pArray, __compar_fn_t compar) { - ASSERT(pArray != NULL && compar != NULL); taosSort(pArray->pData, pArray->size, pArray->elemSize, compar); } void* taosArraySearch(const SArray* pArray, const void* key, __compar_fn_t comparFn, int32_t flags) { - assert(pArray != NULL && comparFn != NULL); - assert(key != NULL); - return taosbsearch(key, pArray->pData, pArray->size, pArray->elemSize, comparFn, flags); } diff --git a/source/util/src/tcache.c b/source/util/src/tcache.c index 7d1686ef80..761da6986b 100644 --- a/source/util/src/tcache.c +++ b/source/util/src/tcache.c @@ -921,7 +921,7 @@ void taosCacheRefresh(SCacheObj *pCacheObj, __cache_trav_fn_t fp, void *param1) void taosStopCacheRefreshWorker(void) { stopRefreshWorker = true; TdThreadOnce tmp = PTHREAD_ONCE_INIT; - if (memcmp(&cacheRefreshWorker, &tmp, sizeof(TdThreadOnce)) != 0) taosThreadJoin(cacheRefreshWorker, NULL); + if (memcmp(&cacheThreadInit, &tmp, sizeof(TdThreadOnce)) != 0) taosThreadJoin(cacheRefreshWorker, NULL); taosArrayDestroy(pCacheArrayList); } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 34ad9ae6bc..62f074db5b 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -897,6 +897,7 @@ void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, vo pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); goto _return; } diff --git a/source/util/src/tpagedbuf.c b/source/util/src/tpagedbuf.c index 1ddb934668..76431d7836 100644 --- a/source/util/src/tpagedbuf.c +++ b/source/util/src/tpagedbuf.c @@ -5,7 +5,10 @@ #include "thash.h" #include "tlog.h" -#define GET_DATA_PAYLOAD(_p) ((char*)(_p)->pData + POINTER_BYTES) +#define GET_PAYLOAD_DATA(_p) ((char*)(_p)->pData + POINTER_BYTES) +#define BUF_PAGE_IN_MEM(_p) ((_p)->pData != NULL) +#define CLEAR_BUF_PAGE_IN_MEM_FLAG(_p) ((_p)->pData = NULL) +#define HAS_DATA_IN_DISK(_p) ((_p)->offset >= 0) #define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) typedef struct SPageDiskInfo { @@ -14,7 +17,7 @@ typedef struct SPageDiskInfo { } SPageDiskInfo, SFreeListItem; struct SPageInfo { - SListNode* pn; // point to list node struct + SListNode* pn; // point to list node struct. it is NULL when the page is evicted from the in-memory buffer void* pData; int64_t offset; int32_t pageId; @@ -89,7 +92,7 @@ static char* doDecompressData(void* data, int32_t srcSize, int32_t* dst, SDiskba return data; } -static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { +static uint64_t allocateNewPositionInFile(SDiskbasedBuf* pBuf, size_t size) { if (pBuf->pFree == NULL) { return pBuf->nextPos; } else { @@ -112,10 +115,6 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { } } -static void setPageNotInBuf(SPageInfo* pPageInfo) { pPageInfo->pData = NULL; } - -static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize + POINTER_BYTES + sizeof(SFilePage); } - /** * +--------------------------+-------------------+--------------+ * | PTR to SPageInfo (8bytes)| Payload (PageSize)| 2 Extra Bytes| @@ -124,22 +123,31 @@ static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize * @param pg * @return */ -static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { - ASSERT(!pg->used && pg->pData != NULL); + +static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize + POINTER_BYTES + sizeof(SFilePage); } + +static char* doFlushBufPage(SDiskbasedBuf* pBuf, SPageInfo* pg) { + if (pg->pData == NULL || pg->used) { + uError("invalid params in paged buffer process when flushing buf to disk, %s", pBuf->id); + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } int32_t size = pBuf->pageSize; char* t = NULL; - if (pg->offset == -1 || pg->dirty) { - void* payload = GET_DATA_PAYLOAD(pg); + if ((!HAS_DATA_IN_DISK(pg)) || pg->dirty) { + void* payload = GET_PAYLOAD_DATA(pg); t = doCompressData(payload, pBuf->pageSize, &size, pBuf); + if (size < 0) { + uError("failed to compress data when flushing data to disk, %s", pBuf->id); + return NULL; + } } // this page is flushed to disk for the first time if (pg->dirty) { - if (pg->offset == -1) { - ASSERTS(pg->dirty == true, "pg->dirty is false"); - - pg->offset = allocatePositionInFile(pBuf, size); + if (!HAS_DATA_IN_DISK(pg)) { + pg->offset = allocateNewPositionInFile(pBuf, size); pBuf->nextPos += size; int32_t ret = taosLSeekFile(pBuf->pFile, pg->offset, SEEK_SET); @@ -154,6 +162,7 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { return NULL; } + // extend the file size if (pBuf->fileSize < pg->offset + size) { pBuf->fileSize = pg->offset + size; } @@ -168,7 +177,7 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { taosArrayPush(pBuf->pFree, &dinfo); // 2. allocate new position, and update the info - pg->offset = allocatePositionInFile(pBuf, size); + pg->offset = allocateNewPositionInFile(pBuf, size); pBuf->nextPos += size; } @@ -196,20 +205,19 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { size = pg->length; } - ASSERT(size > 0 || (pg->offset == -1 && pg->length == -1)); - char* pDataBuf = pg->pData; memset(pDataBuf, 0, getAllocPageSize(pBuf->pageSize)); + #ifdef BUF_PAGE_DEBUG uDebug("page_flush %p, pageId:%d, offset:%d", pDataBuf, pg->pageId, pg->offset); #endif + pg->length = size; // on disk size return pDataBuf; } -static char* flushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { +static char* flushBufPage(SDiskbasedBuf* pBuf, SPageInfo* pg) { int32_t ret = TSDB_CODE_SUCCESS; - ASSERT(((int64_t)pBuf->numOfPages * pBuf->pageSize) == pBuf->totalBufSize && pBuf->numOfPages >= pBuf->inMemPages); if (pBuf->pFile == NULL) { if ((ret = createDiskFile(pBuf)) != TSDB_CODE_SUCCESS) { @@ -218,22 +226,27 @@ static char* flushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { } } - char* p = doFlushPageToDisk(pBuf, pg); - setPageNotInBuf(pg); - pg->dirty = false; + char* p = doFlushBufPage(pBuf, pg); + CLEAR_BUF_PAGE_IN_MEM_FLAG(pg); + pg->dirty = false; return p; } // load file block data in disk static int32_t loadPageFromDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { + if (pg->offset < 0 || pg->length <= 0) { + uError("failed to load buf page from disk, offset:%"PRId64", length:%d, %s", pg->offset, pg->length, pBuf->id); + return TSDB_CODE_INVALID_PARA; + } + int32_t ret = taosLSeekFile(pBuf->pFile, pg->offset, SEEK_SET); if (ret == -1) { ret = TAOS_SYSTEM_ERROR(errno); return ret; } - void* pPage = (void*)GET_DATA_PAYLOAD(pg); + void* pPage = (void*)GET_PAYLOAD_DATA(pg); ret = (int32_t)taosReadFile(pBuf->pFile, pPage, pg->length); if (ret != pg->length) { ret = TAOS_SYSTEM_ERROR(errno); @@ -248,10 +261,14 @@ static int32_t loadPageFromDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { return 0; } -static SPageInfo* registerPage(SDiskbasedBuf* pBuf, int32_t pageId) { +static SPageInfo* registerNewPageInfo(SDiskbasedBuf* pBuf, int32_t pageId) { pBuf->numOfPages += 1; SPageInfo* ppi = taosMemoryMalloc(sizeof(SPageInfo)); + if (ppi == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } ppi->pageId = pageId; ppi->pData = NULL; @@ -271,48 +288,33 @@ static SListNode* getEldestUnrefedPage(SDiskbasedBuf* pBuf) { SListNode* pn = NULL; while ((pn = tdListNext(&iter)) != NULL) { SPageInfo* pageInfo = *(SPageInfo**)pn->data; - if (pageInfo->pageId < 0 || pageInfo->pn != pn) { - uError("data in consistent in paged buffer, %s", pBuf->id); - return NULL; - } + + SPageInfo* p = *(SPageInfo**)(pageInfo->pData); + ASSERT(pageInfo->pageId >= 0 && pageInfo->pn == pn && p == pageInfo); if (!pageInfo->used) { break; - } else { - // printf("page %d is used, dirty:%d\n", pageInfo->pageId, pageInfo->dirty); } } return pn; } -static char* evacOneDataPage(SDiskbasedBuf* pBuf) { - char* bufPage = NULL; +static char* evictBufPage(SDiskbasedBuf* pBuf) { SListNode* pn = getEldestUnrefedPage(pBuf); - terrno = 0; - - // all pages are referenced by user, try to allocate new space - if (pn == NULL) { - int32_t prev = pBuf->inMemPages; - - // increase by 50% of previous mem pages - pBuf->inMemPages = (int32_t)(pBuf->inMemPages * 1.5f); - - // qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pBuf, prev, - // pBuf->inMemPages, pBuf->pageSize); - } else { - tdListPopNode(pBuf->lruList, pn); - - SPageInfo* d = *(SPageInfo**)pn->data; - ASSERTS(d->pn == pn, "d->pn not equal pn"); - - d->pn = NULL; - taosMemoryFreeClear(pn); - - bufPage = flushPageToDisk(pBuf, d); + if (pn == NULL) { // no available buffer pages now, return. + return NULL; } - return bufPage; + terrno = 0; + tdListPopNode(pBuf->lruList, pn); + + SPageInfo* d = *(SPageInfo**)pn->data; + + d->pn = NULL; + taosMemoryFreeClear(pn); + + return flushBufPage(pBuf, d); } static void lruListPushFront(SList* pList, SPageInfo* pi) { @@ -339,13 +341,12 @@ int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMem SDiskbasedBuf* pPBuf = *pBuf; if (pPBuf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + goto _error; } pPBuf->pageSize = pagesize; pPBuf->numOfPages = 0; // all pages are in buffer in the first place pPBuf->totalBufSize = 0; - pPBuf->inMemPages = inMemBufSize / pagesize; // maximum allowed pages, it is a soft limit. pPBuf->allocateId = -1; pPBuf->pFile = NULL; pPBuf->id = strdup(id); @@ -354,32 +355,69 @@ int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMem pPBuf->freePgList = tdListNew(POINTER_BYTES); // at least more than 2 pages must be in memory - if (pPBuf->inMemPages < 2) { - pPBuf->inMemPages = 2; + if (inMemBufSize < pagesize * 2) { + inMemBufSize = pagesize * 2; } + pPBuf->inMemPages = inMemBufSize / pagesize; // maximum allowed pages, it is a soft limit. pPBuf->lruList = tdListNew(POINTER_BYTES); + if (pPBuf->lruList == NULL) { + goto _error; + } // init id hash table _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT); pPBuf->pIdList = taosArrayInit(4, POINTER_BYTES); - pPBuf->all = taosHashInit(10, fn, true, false); - pPBuf->prefix = (char*) dir; + if (pPBuf->pIdList == NULL) { + goto _error; + } + pPBuf->assistBuf = taosMemoryMalloc(pPBuf->pageSize + 2); // EXTRA BYTES + if (pPBuf->assistBuf == NULL) { + goto _error; + } + + pPBuf->all = taosHashInit(10, fn, true, false); + if (pPBuf->all == NULL) { + goto _error; + } + + pPBuf->prefix = (char*) dir; pPBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); // qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, // pPBuf->pageSize, pPBuf->inMemPages, pPBuf->path); return TSDB_CODE_SUCCESS; + _error: + destroyDiskbasedBuf(pPBuf); + return TSDB_CODE_OUT_OF_MEMORY; +} + +static char* doExtractPage(SDiskbasedBuf* pBuf) { + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { + availablePage = evictBufPage(pBuf); + if (availablePage == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + uWarn("no available buf pages, current:%d, max:%d", listNEles(pBuf->lruList), pBuf->inMemPages) + } + } else { + availablePage = taosMemoryCalloc(1, getAllocPageSize(pBuf->pageSize)); // add extract bytes in case of zipped buffer increased. + if (availablePage == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + } + } + + return availablePage; } void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { pBuf->statis.getPages += 1; - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { - availablePage = evacOneDataPage(pBuf); + char* availablePage = doExtractPage(pBuf); + if (availablePage == NULL) { + return NULL; } SPageInfo* pi = NULL; @@ -394,7 +432,10 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { *pageId = (++pBuf->allocateId); // register page id info - pi = registerPage(pBuf, *pageId); + pi = registerNewPageInfo(pBuf, *pageId); + if (pi == NULL) { + return NULL; + } // add to hash map taosHashPut(pBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); @@ -402,26 +443,21 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { } // add to LRU list - ASSERT(listNEles(pBuf->lruList) < pBuf->inMemPages && pBuf->inMemPages > 0); lruListPushFront(pBuf->lruList, pi); - - // allocate buf - if (availablePage == NULL) { - // add extract bytes in case of zipped buffer increased. - pi->pData = taosMemoryMalloc(getAllocPageSize(pBuf->pageSize)); - } else { - pi->pData = availablePage; - } + pi->pData = availablePage; ((void**)pi->pData)[0] = pi; #ifdef BUF_PAGE_DEBUG uDebug("page_getNewBufPage , pi->pData:%p, pageId:%d, offset:%" PRId64, pi->pData, pi->pageId, pi->offset); #endif - return (void*)(GET_DATA_PAYLOAD(pi)); + + return (void*)(GET_PAYLOAD_DATA(pi)); } void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { if (id < 0) { + terrno = TSDB_CODE_INVALID_PARA; + uError("invalid page id:%d, %s", id, pBuf->id); return NULL; } @@ -429,45 +465,40 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { SPageInfo** pi = taosHashGet(pBuf->all, &id, sizeof(int32_t)); if (pi == NULL || *pi == NULL) { - uError("no pages exist, id:%d, %s", id, pBuf->id); + uError("failed to locate the buffer page:%d, %s", id, pBuf->id); + terrno = TSDB_CODE_INVALID_PARA; return NULL; } - if ((*pi)->pData != NULL) { // it is in memory + if (BUF_PAGE_IN_MEM(*pi)) { // it is in memory // no need to update the LRU list if only one page exists if (pBuf->numOfPages == 1) { (*pi)->used = true; - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } SPageInfo** pInfo = (SPageInfo**)((*pi)->pn->data); if (*pInfo != *pi) { - uError("data inconsistent in paged buf, %s", pBuf->id); + uError("inconsistently data in paged buffer, pInfo:%p, pi:%p, %s", *pInfo, *pi, pBuf->id); return NULL; } lruListMoveToFront(pBuf->lruList, (*pi)); (*pi)->used = true; + #ifdef BUF_PAGE_DEBUG uDebug("page_getBufPage1 pageId:%d, offset:%" PRId64, (*pi)->pageId, (*pi)->offset); #endif - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } else { // not in memory - ASSERT((*pi)->pData == NULL && (*pi)->pn == NULL && + ASSERT((!BUF_PAGE_IN_MEM(*pi)) && (*pi)->pn == NULL && (((*pi)->length >= 0 && (*pi)->offset >= 0) || ((*pi)->length == -1 && (*pi)->offset == -1))); - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { - availablePage = evacOneDataPage(pBuf); - if (availablePage == NULL) { - return NULL; - } - } + (*pi)->pData = doExtractPage(pBuf); - if (availablePage == NULL) { - (*pi)->pData = taosMemoryCalloc(1, getAllocPageSize(pBuf->pageSize)); - } else { - (*pi)->pData = availablePage; + // failed to evict buffer page, return with error code. + if ((*pi)->pData == NULL) { + return NULL; } // set the ptr to the new SPageInfo @@ -477,20 +508,25 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { (*pi)->used = true; // some data has been flushed to disk, and needs to be loaded into buffer again. - if ((*pi)->length > 0 && (*pi)->offset >= 0) { + if (HAS_DATA_IN_DISK(*pi)) { int32_t code = loadPageFromDisk(pBuf, *pi); if (code != 0) { + terrno = code; return NULL; } } #ifdef BUF_PAGE_DEBUG uDebug("page_getBufPage2 pageId:%d, offset:%" PRId64, (*pi)->pageId, (*pi)->offset); #endif - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } } void releaseBufPage(SDiskbasedBuf* pBuf, void* page) { + if (page == NULL) { + return; + } + SPageInfo* ppi = getPageInfoFromPayload(page); releaseBufPageInfo(pBuf, ppi); } @@ -499,7 +535,13 @@ void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) { #ifdef BUF_PAGE_DEBUG uDebug("page_releaseBufPageInfo pageId:%d, used:%d, offset:%" PRId64, pi->pageId, pi->used, pi->offset); #endif + + if (pi == NULL) { + return; + } + if (pi->pData == NULL) { + uError("pi->pData (page data) is null"); return; } diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index a9a84c1860..5581931178 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -227,6 +227,7 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem uError("worker:%s:%d failed to create", pool->name, curWorkerNum); taosMemoryFree(worker); taosCloseQueue(queue); + taosThreadMutexUnlock(&pool->mutex); terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } diff --git a/tests/develop-test/2-query/show_create_db.py b/tests/develop-test/2-query/show_create_db.py new file mode 100644 index 0000000000..e5a79074ef --- /dev/null +++ b/tests/develop-test/2-query/show_create_db.py @@ -0,0 +1,82 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TD-11204]Difference improvement that can ignore negative + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), False) + self._conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use scd") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists scd") + tdSql.execute("create database if not exists scd") + tdSql.execute('use scd') + tdSql.execute('create table stb1 (ts timestamp, c1 bool, c2 tinyint, c3 smallint, c4 int, c5 bigint, c6 float, c7 double, c8 binary(10), c9 nchar(10), c10 tinyint unsigned, c11 smallint unsigned, c12 int unsigned, c13 bigint unsigned) TAGS(t1 int, t2 binary(10), t3 double);') + + tdSql.execute("create table tb1 using stb1 tags(1,'1',1.0);") + + tdSql.execute("create table tb2 using stb1 tags(2,'2',2.0);") + + tdSql.execute("create table tb3 using stb1 tags(3,'3',3.0);") + + tdSql.execute('create database scd2 stt_trigger 3;') + + tdSql.execute('create database scd4 stt_trigger 13;') + + tdSql.query('show create database scd;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd') + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd2;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd2') + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd4') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd4') + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + + self.restartTaosd(1, dbname='scd') + + tdSql.query('show create database scd;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd') + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd2;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd2') + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd4') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd4') + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + + tdSql.execute('drop database scd') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/develop-test/2-query/table_count_scan.py b/tests/develop-test/2-query/table_count_scan.py new file mode 100644 index 0000000000..1ef65bfc67 --- /dev/null +++ b/tests/develop-test/2-query/table_count_scan.py @@ -0,0 +1,238 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TD-21890] table count scan test case + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), False) + self._conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use tbl_count") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists tbl_count") + tdSql.execute("create database if not exists tbl_count") + tdSql.execute('use tbl_count') + tdSql.execute('create table stb1 (ts timestamp, c1 bool, c2 tinyint, c3 smallint, c4 int, c5 bigint, c6 float, c7 double, c8 binary(10), c9 nchar(10), c10 tinyint unsigned, c11 smallint unsigned, c12 int unsigned, c13 bigint unsigned) TAGS(t1 int, t2 binary(10), t3 double);') + + tdSql.execute("create table tb1 using stb1 tags(1,'1',1.0);") + + tdSql.execute("create table tb2 using stb1 tags(2,'2',2.0);") + + tdSql.execute("create table tb3 using stb1 tags(3,'3',3.0);") + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:00\',true,1,1,1,1,1,1,"123","1234",1,1,1,1);') + + tdSql.execute("insert into tb1 values ('2021-11-11 09:00:01',true,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);") + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:02\',true,2,NULL,2,NULL,2,NULL,"234",NULL,2,NULL,2,NULL);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:03\',false,NULL,3,NULL,3,NULL,3,NULL,"3456",NULL,3,NULL,3);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:04\',true,4,4,4,4,4,4,"456","4567",4,4,4,4);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:05\',true,127,32767,2147483647,9223372036854775807,3.402823466e+38,1.79769e+308,"567","5678",254,65534,4294967294,9223372036854775807);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:06\',true,-127,-32767,-2147483647,-9223372036854775807,-3.402823466e+38,-1.79769e+308,"678","6789",0,0,0,0);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:00\',true,1,1,1,1,1,1,"111","1111",1,1,1,1);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:01\',true,2,2,2,2,2,2,"222","2222",2,2,2,2);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:02\',true,3,3,2,3,3,3,"333","3333",3,3,3,3);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:03\',false,4,4,4,4,4,4,"444","4444",4,4,4,4);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:04\',true,5,5,5,5,5,5,"555","5555",5,5,5,5);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:05\',true,6,6,6,6,6,6,"666","6666",6,6,6,6);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:06\',true,7,7,7,7,7,7,"777","7777",7,7,7,7);') + + tdSql.query('select count(*),db_name, stable_name from information_schema.ins_tables group by db_name, stable_name;') + tdSql.checkRows(3) + tdSql.checkData(0, 0, 23) + tdSql.checkData(0, 1, 'information_schema') + tdSql.checkData(0, 2, None) + tdSql.checkData(1, 0, 3) + tdSql.checkData(1, 1, 'tbl_count') + tdSql.checkData(1, 2, 'stb1') + tdSql.checkData(2, 0, 5) + tdSql.checkData(2, 1, 'performance_schema') + tdSql.checkData(2, 2, None) + + tdSql.query('select count(1),db_name, stable_name from information_schema.ins_tables group by db_name, stable_name;') + tdSql.checkRows(3) + tdSql.checkData(0, 0, 23) + tdSql.checkData(0, 1, 'information_schema') + tdSql.checkData(0, 2, None) + tdSql.checkData(1, 0, 5) + tdSql.checkData(1, 1, 'performance_schema') + tdSql.checkData(1, 2, None) + tdSql.checkData(2, 0, 3) + tdSql.checkData(2, 1, 'tbl_count') + tdSql.checkData(2, 2, 'stb1') + + tdSql.query('select count(1),db_name from information_schema.ins_tables group by db_name') + tdSql.checkRows(3) + tdSql.checkData(0, 0, 5) + tdSql.checkData(0, 1, 'performance_schema') + tdSql.checkData(1, 0, 3) + tdSql.checkData(1, 1, 'tbl_count') + tdSql.checkData(2, 0, 23) + tdSql.checkData(2, 1, 'information_schema') + + tdSql.query("select count(*) from information_schema.ins_tables where db_name='tbl_count'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, 3) + + tdSql.query('select count(*) from information_schema.ins_tables where db_name=\'tbl_count\' and stable_name="stb1";') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 3) + + tdSql.query('select count(*) from information_schema.ins_tables') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 31) + + + tdSql.execute('create table stba (ts timestamp, c1 bool, c2 tinyint, c3 smallint, c4 int, c5 bigint, c6 float, c7 double, c8 binary(10), c9 nchar(10), c10 tinyint unsigned, c11 smallint unsigned, c12 int unsigned, c13 bigint unsigned) TAGS(t1 int, t2 binary(10), t3 double);') + + tdSql.execute("create table tba1 using stba tags(1,'1',1.0);") + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:00\',true, 1,1,1,1,1,1,"111","1111",1,1,1,1);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:01\',true, 2,2,2,2,2,2,"222","2222",2,2,2,2);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:02\',true, 3,3,2,3,3,3,"333","3333",3,3,3,3);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:03\',false,4,4,4,4,4,4,"444","4444",4,4,4,4);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:04\',true, 5,5,5,5,5,5,"555","5555",5,5,5,5);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:05\',true, 6,6,6,6,6,6,"666","6666",6,6,6,6);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:06\',true, 7,7,7,7,7,7,"777","7777",7,7,7,7);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:07\',true, 8,8,8,8,8,8,"888","8888",8,8,8,8);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:08\',true, 9,9,9,9,9,9,"999","9999",9,9,9,9);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:09\',true, 0,0,0,0,0,0,"000","0000",0,0,0,0);') + + self.restartTaosd(1, dbname='tbl_count') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:10\',true, 1,1,1,1,1,1,"111","1111",1,1,1,1);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:11\',true, 2,2,2,2,2,2,"222","2222",2,2,2,2);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:12\',true, 3,3,2,3,3,3,"333","3333",3,3,3,3);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:13\',false,4,4,4,4,4,4,"444","4444",4,4,4,4);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:14\',true, 5,5,5,5,5,5,"555","5555",5,5,5,5);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:15\',true, 6,6,6,6,6,6,"666","6666",6,6,6,6);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:16\',true, 7,7,7,7,7,7,"777","7777",7,7,7,7);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:17\',true, 8,8,8,8,8,8,"888","8888",8,8,8,8);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:18\',true, 9,9,9,9,9,9,"999","9999",9,9,9,9);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:19\',true, 0,0,0,0,0,0,"000","0000",0,0,0,0);') + + self.restartTaosd(1, dbname='tbl_count') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:20\',true, 1,1,1,1,1,1,"111","1111",1,1,1,1);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:21\',true, 2,2,2,2,2,2,"222","2222",2,2,2,2);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:22\',true, 3,3,2,3,3,3,"333","3333",3,3,3,3);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:23\',false,4,4,4,4,4,4,"444","4444",4,4,4,4);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:24\',true, 5,5,5,5,5,5,"555","5555",5,5,5,5);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:25\',true, 6,6,6,6,6,6,"666","6666",6,6,6,6);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:26\',true, 7,7,7,7,7,7,"777","7777",7,7,7,7);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:27\',true, 8,8,8,8,8,8,"888","8888",8,8,8,8);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:28\',true, 9,9,9,9,9,9,"999","9999",9,9,9,9);') + + tdSql.execute('insert into tba1 values (\'2021-11-11 09:00:29\',true, 0,0,0,0,0,0,"000","0000",0,0,0,0);') + + tdSql.query('select count(*),db_name, stable_name from information_schema.ins_tables group by db_name, stable_name;') + tdSql.checkRows(4) + tdSql.checkData(0, 0, 1) + tdSql.checkData(0, 1, 'tbl_count') + tdSql.checkData(0, 2, 'stba') + tdSql.checkData(1, 0, 23) + tdSql.checkData(1, 1, 'information_schema') + tdSql.checkData(1, 2, None) + tdSql.checkData(2, 0, 3) + tdSql.checkData(2, 1, 'tbl_count') + tdSql.checkData(2, 2, 'stb1') + tdSql.checkData(3, 0, 5) + tdSql.checkData(3, 1, 'performance_schema') + tdSql.checkData(3, 2, None) + + tdSql.query('select count(1),db_name, stable_name from information_schema.ins_tables group by db_name, stable_name;') + tdSql.checkRows(4) + tdSql.checkData(0, 0, 23) + tdSql.checkData(0, 1, 'information_schema') + tdSql.checkData(0, 2, None) + tdSql.checkData(1, 0, 5) + tdSql.checkData(1, 1, 'performance_schema') + tdSql.checkData(1, 2, None) + tdSql.checkData(2, 0, 1) + tdSql.checkData(2, 1, 'tbl_count') + tdSql.checkData(2, 2, 'stba') + tdSql.checkData(3, 0, 3) + tdSql.checkData(3, 1, 'tbl_count') + tdSql.checkData(3, 2, 'stb1') + + tdSql.query('select count(1),db_name from information_schema.ins_tables group by db_name') + tdSql.checkRows(3) + tdSql.checkData(0, 0, 5) + tdSql.checkData(0, 1, 'performance_schema') + tdSql.checkData(1, 0, 4) + tdSql.checkData(1, 1, 'tbl_count') + tdSql.checkData(2, 0, 23) + tdSql.checkData(2, 1, 'information_schema') + + tdSql.query("select count(*) from information_schema.ins_tables where db_name='tbl_count'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, 4) + + tdSql.query('select count(*) from information_schema.ins_tables where db_name=\'tbl_count\' and stable_name="stb1";') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 3) + + tdSql.query('select count(*) from information_schema.ins_tables') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 32) + + + tdSql.execute('drop database tbl_count') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py b/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py index 2c6d09b0f5..1b65e38d72 100644 --- a/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py +++ b/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py @@ -19,32 +19,38 @@ from util.dnodes import * class TDTestCase: def caseDescription(self): - ''' + """ [TD-11510] taosBenchmark test cases - ''' - return + """ def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) + self.replicaVar = int(replicaVar) tdSql.init(conn.cursor(), logSql) def getPath(self, tool="taosBenchmark"): selfPath = os.path.dirname(os.path.realpath(__file__)) - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] + if "community" in selfPath: + projPath = selfPath[: selfPath.find("community")] + elif "src" in selfPath: + projPath = selfPath[: selfPath.find("src")] + elif "/tools/" in selfPath: + projPath = selfPath[: selfPath.find("/tools/")] + elif "/tests/" in selfPath: + projPath = selfPath[: selfPath.find("/tests/")] else: - projPath = selfPath[:selfPath.find("tests")] + tdLog.info("cannot found %s in path: %s, use system's" % (tool, selfPath)) + projPath = "/usr/local/taos/bin/" paths = [] - for root, dirs, files in os.walk(projPath): - if ((tool) in files): + for root, dummy, files in os.walk(projPath): + if (tool) in files: rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): + if "packaging" not in rootRealPath: paths.append(os.path.join(root, tool)) break - if (len(paths) == 0): + if len(paths) == 0: tdLog.exit("taosBenchmark not found!") return else: @@ -52,31 +58,45 @@ class TDTestCase: return paths[0] def run(self): + tdSql.query("select client_version()") + client_ver = "".join(tdSql.queryResult[0]) + major_ver = client_ver.split(".")[0] + binPath = self.getPath() - cmd = "%s -f ./5-taos-tools/taosbenchmark/json/sml_json_alltypes.json" %binPath + cmd = "%s -f ./5-taos-tools/taosbenchmark/json/sml_json_alltypes.json" % binPath tdLog.info("%s" % cmd) os.system("%s" % cmd) tdSql.execute("reset query cache") tdSql.query("describe db.stb1") tdSql.checkData(1, 1, "BOOL") tdSql.query("describe db.stb2") - tdSql.checkData(1, 1, "TINYINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb3") - tdSql.checkData(1, 1, "SMALLINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb4") - tdSql.checkData(1, 1, "INT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb5") - tdSql.checkData(1, 1, "BIGINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb6") - tdSql.checkData(1, 1, "FLOAT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb7") tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb8") - tdSql.checkData(1, 1, "VARCHAR") - tdSql.checkData(1, 2, 16) + if major_ver == "3": + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 16) + else: + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 8) + tdSql.query("describe db.stb9") - tdSql.checkData(1, 1, "NCHAR") - tdSql.checkData(1, 2, 16) + if major_ver == "3": + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 16) + else: + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 8) + tdSql.query("select count(*) from db.stb1") tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb2") diff --git a/tests/docs-examples-test/python.sh b/tests/docs-examples-test/python.sh index 140d05395b..ccb391b752 100644 --- a/tests/docs-examples-test/python.sh +++ b/tests/docs-examples-test/python.sh @@ -23,7 +23,7 @@ python3 bind_param_example.py # 4 taos -s "drop database power" -python3 multi_bind_example.py +python3 multi_bind_example.py # 5 python3 query_example.py @@ -44,4 +44,43 @@ taos -s "drop database test" python3 json_protocol_example.py # 10 -# python3 subscribe_demo.py +pip install SQLAlchemy +pip install pandas +taosBenchmark -y -d power -t 10 -n 10 +python3 conn_native_pandas.py +python3 conn_rest_pandas.py +taos -s "drop database if exists power" + +# 11 +taos -s "create database if not exists test" +python3 connect_native_reference.py + +# 12 +python3 connect_rest_examples.py + +# 13 +python3 handle_exception.py + +# 14 +taosBenchmark -y -d power -t 2 -n 10 +python3 rest_client_example.py +taos -s "drop database if exists power" + +# 15 +python3 result_set_examples.py + +# 16 +python3 tmq_example.py + +# 17 +python3 sql_writer.py + +# 18 +python3 mockdatasource.py + +# 19 +python3 fast_write_example.py + +# 20 +pip3 install kafka-python +python3 kafka_example.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index d4fe45d42b..df4cc5f468 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -145,6 +145,7 @@ ,,y,script,./test.sh -f tsim/parser/precision_ns.sim ,,y,script,./test.sh -f tsim/parser/projection_limit_offset.sim ,,y,script,./test.sh -f tsim/parser/regex.sim +,,y,script,./test.sh -f tsim/parser/regressiontest.sim ,,y,script,./test.sh -f tsim/parser/select_across_vnodes.sim ,,y,script,./test.sh -f tsim/parser/select_distinct_tag.sim ,,y,script,./test.sh -f tsim/parser/select_from_cache_disk.sim @@ -445,6 +446,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/database_pre_suf.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/InsertFuturets.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show.py +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/information_schema.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/abs.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/abs.py -R ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/and_or_for_byte.py @@ -668,7 +670,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 6 -M 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 -n 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5 +#,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeRecreateMnode.py -N 5 -M 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeStopFollowerLeader.py -N 5 -M 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeStop2Follower.py -N 5 -M 3 @@ -1049,6 +1051,8 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-20582.py #develop test +,,n,develop-test,python3 ./test.py -f 2-query/table_count_scan.py +,,n,develop-test,python3 ./test.py -f 2-query/show_create_db.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/auto_create_table_json.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/custom_col_tag.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/default_json.py diff --git a/tests/parallel_test/container_build.sh b/tests/parallel_test/container_build.sh index 5059630a3f..ff854449bb 100755 --- a/tests/parallel_test/container_build.sh +++ b/tests/parallel_test/container_build.sh @@ -37,9 +37,9 @@ if [ -z "$WORKDIR" ]; then usage exit 1 fi -if [ -z "$THREAD_COUNT" ]; then - THREAD_COUNT=1 -fi +# if [ -z "$THREAD_COUNT" ]; then +# THREAD_COUNT=1 +# fi ulimit -c unlimited @@ -55,7 +55,7 @@ fi date docker run \ -v $REP_MOUNT_PARAM \ - --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true;make -j $THREAD_COUNT || exit 1" + --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=true;make -j || exit 1" if [[ -d ${WORKDIR}/debugNoSan ]] ;then echo "delete ${WORKDIR}/debugNoSan" @@ -70,7 +70,7 @@ mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugNoSan date docker run \ -v $REP_MOUNT_PARAM \ - --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug;make -j $THREAD_COUNT || exit 1 " + --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=true;make -j || exit 1 " mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugSan diff --git a/tests/parallel_test/run.sh b/tests/parallel_test/run.sh index b5d57265be..43533d4f36 100755 --- a/tests/parallel_test/run.sh +++ b/tests/parallel_test/run.sh @@ -184,6 +184,10 @@ function run_thread() { if [ $? -eq 0 ]; then case_file=`echo "$case_cmd"|grep -o ".*\.py"|awk '{print $NF}'` fi + echo "$case_cmd"|grep -q "^./pytest.sh" + if [ $? -eq 0 ]; then + case_file=`echo "$case_cmd"|grep -o ".*\.py"|awk '{print $NF}'` + fi echo "$case_cmd"|grep -q "\.sim" if [ $? -eq 0 ]; then case_file=`echo "$case_cmd"|grep -o ".*\.sim"|awk '{print $NF}'` diff --git a/tests/script/api/batchprepare.c b/tests/script/api/batchprepare.c index 60df188d7b..d1a80d9683 100644 --- a/tests/script/api/batchprepare.c +++ b/tests/script/api/batchprepare.c @@ -2828,7 +2828,7 @@ void runAll(TAOS *taos) { printf("%s Begin\n", gCaseCtrl.caseCatalog); runCaseList(taos); -#if 0 +#if 1 strcpy(gCaseCtrl.caseCatalog, "Micro DB precision Test"); printf("%s Begin\n", gCaseCtrl.caseCatalog); gCaseCtrl.precision = TIME_PRECISION_MICRO; diff --git a/tests/script/sh/checkAsan.sh b/tests/script/sh/checkAsan.sh index 7df17b22da..7225722791 100755 --- a/tests/script/sh/checkAsan.sh +++ b/tests/script/sh/checkAsan.sh @@ -39,7 +39,7 @@ python_error=`cat ${LOG_DIR}/*.info | grep -w "stack" | wc -l` # /root/TDengine/source/libs/scalar/src/sclvector.c:1075:66: runtime error: signed integer overflow: 9223372034707292160 + 1668838476672 cannot be represented in type 'long int' # /root/TDengine/source/common/src/tdataformat.c:1876:7: runtime error: signed integer overflow: 8252423483843671206 + 2406154664059062870 cannot be represented in type 'long int' -runtime_error=`cat ${LOG_DIR}/*.asan | grep "runtime error" | grep -v "trees.c:873" | grep -v "sclfunc.c.*outside the range of representable values of type"| grep -v "signed integer overflow" | wc -l` +runtime_error=`cat ${LOG_DIR}/*.asan | grep "runtime error" | grep -v "trees.c:873" | grep -v "sclfunc.c.*outside the range of representable values of type"| grep -v "signed integer overflow" |grep -v "strerror.c"| grep -v "asan_malloc_linux.cc" |wc -l` echo -e "\033[44;32;1m"asan error_num: $error_num"\033[0m" echo -e "\033[44;32;1m"asan memory_leak: $memory_leak"\033[0m" diff --git a/tests/script/tsim/db/alter_replica_13.sim b/tests/script/tsim/db/alter_replica_13.sim index d75acb50ad..a9dc1741a1 100644 --- a/tests/script/tsim/db/alter_replica_13.sim +++ b/tests/script/tsim/db/alter_replica_13.sim @@ -79,6 +79,7 @@ sql insert into db.ctb6 values(now, 6, "6") sql insert into db.ctb7 values(now, 7, "7") sql insert into db.ctb8 values(now, 8, "8") sql insert into db.ctb9 values(now, 9, "9") +sql flush database db; print =============== step3: create dnodes sql create dnode $hostname port 7300 diff --git a/tests/script/tsim/parser/regressiontest.sim b/tests/script/tsim/parser/regressiontest.sim index 1b127155cb..3ce2b47b44 100644 --- a/tests/script/tsim/parser/regressiontest.sim +++ b/tests/script/tsim/parser/regressiontest.sim @@ -63,4 +63,38 @@ if $rows != 8198 then return -1 endi +print ===========================> TD-22077 && TD-21877 +sql drop database if exists $db -x step1 +sql create database $db vgroups 1; + +sql use $db +sql create stable st1 (ts timestamp, c int) tags(a int); +sql create table t1 using st1 tags(1); +sql create table t2 using st1 tags(2); + +$i = 0 +$ts = 1674977959000 +$rowNum = 200 + +$x = 0 +while $x < $rowNum +$xs = $x * $delta +$ts = $ts0 + $xs +sql insert into t1 values ( $ts , $x ) +sql insert into t2 values ( $ts + 1000a, $x ) +$x = $x + 1 +$ts = $ts + 1000 +endw + +sql flush database $db + +sql insert into t1 values('2018-09-17 09:00:26', 26); +sql insert into t2 values('2018-09-17 09:00:25', 25); + +sql insert into t2 values('2018-09-17 09:00:30', 30); +sql flush database reg_db0; + +sql delete from st1 where ts<='2018-9-17 09:00:26'; +sql select * from st1; + system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/query/sys_tbname.sim b/tests/script/tsim/query/sys_tbname.sim index 045e908a57..4587dcd4f7 100644 --- a/tests/script/tsim/query/sys_tbname.sim +++ b/tests/script/tsim/query/sys_tbname.sim @@ -86,4 +86,23 @@ if $data00 != @ins_tags@ then return -1 endi +sql create stable stb(ts timestamp, f int) tags(t1 int, t2 int, t3 int, t4 int, t5 int); + +$i = 0 +$tbNum = 1000 +$tbPrefix = stb_tb +while $i < $tbNum + $tb = $tbPrefix . $i + sql create table $tb using stb tags( $i , $i , $i , $i , $i ) + + $i = $i + 1 +endw + +sql select tag_value from information_schema.ins_tags where stable_name='stb'; +if $rows != 5000 then + print $rows + return -1 +endi + + #system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/stream/basic1.sim b/tests/script/tsim/stream/basic1.sim index 7bf10df637..c61c7667f8 100644 --- a/tests/script/tsim/stream/basic1.sim +++ b/tests/script/tsim/stream/basic1.sim @@ -834,4 +834,57 @@ endi print ====== test _wstart end +print insert into ts1 values(-1648791211000,1,2,3) + +sql create database test7 vgroups 1; +sql use test7; +sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create stream streams7 trigger at_once into streamt7 as select _wstart, count(*) from ts1 interval(10s) ; + +sql insert into ts1 values(1648791211000,1,2,3); +sql_error insert into ts1 values(-1648791211000,1,2,3); + +loop18: + +sleep 200 +sql select * from streamt7; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows + goto loop18 +endi + +if $data01 != 1 then + print =====data01=$data01 + goto loop18 +endi + +sql_error insert into ts1 values(-1648791211001,1,2,3) (1648791211001,1,2,3); + +sql select _wstart, count(*) from ts1 interval(10s) ; + +print $data00 $data01 +print $data10 $data11 + +loop19: + +sleep 200 +sql select * from streamt7; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows + goto loop19 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/stream/triggerInterval0.sim b/tests/script/tsim/stream/triggerInterval0.sim index 7353f026bb..b522dcf035 100644 --- a/tests/script/tsim/stream/triggerInterval0.sim +++ b/tests/script/tsim/stream/triggerInterval0.sim @@ -29,69 +29,119 @@ sql insert into t1 values(1648791223001,2,2,3,1.1); sql insert into t1 values(1648791223002,2,2,3,1.1); sql insert into t1 values(1648791223003,2,2,3,1.1); sql insert into t1 values(1648791223001,2,2,3,1.1); + +print step 0 + +$loop_count = 0 + +loop0: sleep 300 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + sql select * from streamt; + if $rows != 1 then print ======$rows - return -1 + goto loop0 endi if $data01 != 1 then print ======$data01 - return -1 + goto loop0 endi sql insert into t1 values(1648791233001,2,2,3,1.1); + +print step 1 + +$loop_count = 0 + +loop1: sleep 300 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + sql select * from streamt; if $rows != 2 then print ======$rows - return -1 + goto loop1 endi if $data01 != 1 then print ======$data01 - return -1 + goto loop1 endi if $data11 != 3 then print ======$data11 - return -1 + goto loop1 endi sql insert into t1 values(1648791223004,2,2,3,1.1); sql insert into t1 values(1648791223004,2,2,3,1.1); sql insert into t1 values(1648791223005,2,2,3,1.1); + +print step 2 + +$loop_count = 0 + +loop2: sleep 300 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + sql select * from streamt; if $rows != 2 then print ======$rows - return -1 + goto loop2 endi + if $data01 != 1 then print ======$data01 - return -1 + goto loop2 endi if $data11 != 5 then print ======$data11 - return -1 + goto loop2 endi sql insert into t1 values(1648791233002,3,2,3,2.1); sql insert into t1 values(1648791213002,4,2,3,3.1) sql insert into t1 values(1648791213002,4,2,3,4.1); + +print step 3 + +$loop_count = 0 + +loop3: sleep 300 -sql select * from streamt; -if $rows != 2 then - print ======$rows - return -1 -endi -if $data01 != 2 then - print ======$data01 - return -1 -endi -if $data11 != 5 then - print ======$data11 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then return -1 endi +sql select * from streamt; +if $rows != 2 then + print ======$rows + goto loop3 +endi +if $data01 != 2 then + print ======$data01 + goto loop3 +endi +if $data11 != 5 then + print ======$data11 + goto loop3 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py new file mode 100644 index 0000000000..1b82fa6e64 --- /dev/null +++ b/tests/system-test/0-others/information_schema.py @@ -0,0 +1,113 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + + +from util.log import * +from util.cases import * +from util.sql import * +from util.common import * +from util.sqlset import * + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + self.setsql = TDSetSql() + self.dbname = 'db' + self.stbname = 'stb' + self.binary_length = 20 # the length of binary for column_dict + self.nchar_length = 20 # the length of nchar for column_dict + self.ts = 1537146000000 + self.column_dict = { + 'ts' : 'timestamp', + 'col1': 'tinyint', + 'col2': 'smallint', + 'col3': 'int', + 'col4': 'bigint', + 'col5': 'tinyint unsigned', + 'col6': 'smallint unsigned', + 'col7': 'int unsigned', + 'col8': 'bigint unsigned', + 'col9': 'float', + 'col10': 'double', + 'col11': 'bool', + 'col12': f'binary({self.binary_length})', + 'col13': f'nchar({self.nchar_length})' + } + self.tbnum = 20 + self.rowNum = 10 + self.tag_dict = { + 't0':'int' + } + self.tag_values = [ + f'1' + ] + self.binary_str = 'taosdata' + self.nchar_str = '涛思数据' + self.ins_list = ['ins_dnodes','ins_mnodes','ins_modules','ins_qnodes','ins_snodes','ins_cluster','ins_databases','ins_functions',\ + 'ins_indexes','ins_stables','ins_tables','ins_tags','ins_users','ins_grants','ins_vgroups','ins_configs','ins_dnode_variables',\ + 'ins_topics','ins_subscriptions','ins_streams','ins_stream_tasks','ins_vnodes','ins_user_privileges'] + self.perf_list = ['perf_connections','perf_queries','perf_consumers','perf_trans','perf_apps'] + def insert_data(self,column_dict,tbname,row_num): + insert_sql = self.setsql.set_insertsql(column_dict,tbname,self.binary_str,self.nchar_str) + for i in range(row_num): + insert_list = [] + self.setsql.insert_values(column_dict,i,insert_sql,insert_list,self.ts) + def prepare_data(self): + tdSql.execute(f"create database if not exists {self.dbname} vgroups 2") + tdSql.execute(f'use {self.dbname}') + tdSql.execute(self.setsql.set_create_stable_sql(self.stbname,self.column_dict,self.tag_dict)) + for i in range(self.tbnum): + tdSql.execute(f"create table {self.stbname}_{i} using {self.stbname} tags({self.tag_values[0]})") + self.insert_data(self.column_dict,f'{self.stbname}_{i}',self.rowNum) + def count_check(self): + tdSql.query('select count(*) from information_schema.ins_tables') + tdSql.checkEqual(tdSql.queryResult[0][0],self.tbnum+len(self.ins_list)+len(self.perf_list)) + tdSql.query(f'select count(*) from information_schema.ins_tables where db_name = "{self.dbname}"') + tdSql.checkEqual(tdSql.queryResult[0][0],self.tbnum) + tdSql.query(f'select count(*) from information_schema.ins_tables where db_name = "{self.dbname}" and stable_name = "{self.stbname}"') + tdSql.checkEqual(tdSql.queryResult[0][0],self.tbnum) + tdSql.execute('create database db1') + tdSql.execute('create table stb1 (ts timestamp,c0 int) tags(t0 int)') + tdSql.execute('create table tb1 using stb1 tags(1)') + tdSql.query(f'select db_name, stable_name, count(*) from information_schema.ins_tables group by db_name, stable_name') + for i in tdSql.queryResult: + if i[0].lower() == 'information_schema': + tdSql.checkEqual(i[2],len(self.ins_list)) + elif i[0].lower() == self.dbname and i[1] == self.stbname: + tdSql.checkEqual(i[2],self.tbnum) + elif i[0].lower() == self.dbname and i[1] == 'stb1': + tdSql.checkEqual(i[2],1) + elif i[0].lower() == 'performance_schema': + tdSql.checkEqual(i[2],len(self.perf_list)) + tdSql.execute('create table db1.ntb (ts timestamp,c0 int)') + tdSql.query(f'select db_name, count(*) from information_schema.ins_tables group by db_name') + print(tdSql.queryResult) + for i in tdSql.queryResult: + if i[0].lower() == 'information_schema': + tdSql.checkEqual(i[1],len(self.ins_list)) + elif i[0].lower() == 'performance_schema': + tdSql.checkEqual(i[1],len(self.perf_list)) + elif i[0].lower() == self.dbname: + tdSql.checkEqual(i[1],self.tbnum+1) + def run(self): + self.prepare_data() + self.count_check() + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/2-query/insert_null_none.py b/tests/system-test/2-query/insert_null_none.py index cf5636fb1f..4304dee89e 100755 --- a/tests/system-test/2-query/insert_null_none.py +++ b/tests/system-test/2-query/insert_null_none.py @@ -24,7 +24,7 @@ from util.dnodes import tdDnodes from util.dnodes import * class TDTestCase: - updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 143 ,"querySmaOptimize":1} + updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 131 ,"querySmaOptimize":1} def init(self, conn, logSql, replicaVar): tdLog.debug("start to execute %s" % __file__) diff --git a/tests/system-test/2-query/nestedQuery.py b/tests/system-test/2-query/nestedQuery.py index 3d0db9a562..6557aad05f 100755 --- a/tests/system-test/2-query/nestedQuery.py +++ b/tests/system-test/2-query/nestedQuery.py @@ -24,9 +24,9 @@ from util.dnodes import tdDnodes from util.dnodes import * class TDTestCase: - updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 , - "jniDebugFlag":143 ,"simDebugFlag":143,"dDebugFlag":143, "dDebugFlag":143,"vDebugFlag":143,"mDebugFlag":143,"qDebugFlag":143, - "wDebugFlag":143,"sDebugFlag":143,"tsdbDebugFlag":143,"tqDebugFlag":143 ,"fsDebugFlag":143 ,"fnDebugFlag":143} + updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 131 ,"cDebugFlag":131,"uDebugFlag":131 ,"rpcDebugFlag":131 , "tmrDebugFlag":131 , + "jniDebugFlag":131 ,"simDebugFlag":131,"dDebugFlag":131, "dDebugFlag":131,"vDebugFlag":131,"mDebugFlag":131,"qDebugFlag":131, + "wDebugFlag":131,"sDebugFlag":131,"tsdbDebugFlag":131,"tqDebugFlag":131 ,"fsDebugFlag":131 ,"fnDebugFlag":131} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) diff --git a/tests/system-test/2-query/stablity.py b/tests/system-test/2-query/stablity.py index ff026bf120..5e4d5dcbaf 100755 --- a/tests/system-test/2-query/stablity.py +++ b/tests/system-test/2-query/stablity.py @@ -24,9 +24,9 @@ from util.dnodes import tdDnodes from util.dnodes import * class TDTestCase: - updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 , - "jniDebugFlag":143 ,"simDebugFlag":143,"dDebugFlag":143, "dDebugFlag":143,"vDebugFlag":143,"mDebugFlag":143,"qDebugFlag":143, - "wDebugFlag":143,"sDebugFlag":143,"tsdbDebugFlag":143,"tqDebugFlag":143 ,"fsDebugFlag":143 ,"fnDebugFlag":143} + updatecfgDict = {'maxSQLLength':1048576,'debugFlag': 131 ,"cDebugFlag":131,"uDebugFlag":131 ,"rpcDebugFlag":131 , "tmrDebugFlag":131 , + "jniDebugFlag":131 ,"simDebugFlag":131,"dDebugFlag":131, "dDebugFlag":131,"vDebugFlag":131,"mDebugFlag":131,"qDebugFlag":131, + "wDebugFlag":131,"sDebugFlag":131,"tsdbDebugFlag":131,"tqDebugFlag":131 ,"fsDebugFlag":131 ,"fnDebugFlag":131} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) diff --git a/tests/system-test/6-cluster/5dnode3mnodeDrop.py b/tests/system-test/6-cluster/5dnode3mnodeDrop.py index de9207ddd8..9dd3c56805 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeDrop.py +++ b/tests/system-test/6-cluster/5dnode3mnodeDrop.py @@ -112,7 +112,8 @@ class TDTestCase: dnode_first_port = dnode.cfgDict["firstEp"].split(":")[-1] cmd = f" taos -h {dnode_first_host} -P {dnode_first_port} -s ' create dnode \"{dnode_id} \" ' ;" tdLog.debug(cmd) - os.system(cmd) + if os.system(cmd) != 0: + raise Exception("failed to execute system command. cmd: %s" % cmd) time.sleep(2) tdLog.info(" create cluster with %d dnode done! " %dnodes_nums) @@ -120,7 +121,7 @@ class TDTestCase: def check3mnode(self): count=0 while count < 10: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") if tdSql.checkRows(3) : tdLog.debug("mnode is three nodes") @@ -157,7 +158,7 @@ class TDTestCase: def check3mnode1off(self): count=0 while count < 10: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") if tdSql.checkRows(3) : tdLog.debug("mnode is three nodes") @@ -189,7 +190,7 @@ class TDTestCase: def check3mnode2off(self): count=0 while count < 40: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") if tdSql.checkRows(3) : tdLog.debug("mnode is three nodes") @@ -219,7 +220,7 @@ class TDTestCase: def check3mnode3off(self): count=0 while count < 10: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") if tdSql.checkRows(3) : tdLog.debug("mnode is three nodes") @@ -279,32 +280,47 @@ class TDTestCase: # drop follower of mnode dropcount =0 - while dropcount <= 10: + while dropcount <= 5: for i in range(1,3): tdLog.debug("drop mnode on dnode %d"%(i+1)) tdSql.execute("drop mnode on dnode %d"%(i+1)) tdSql.query("select * from information_schema.ins_mnodes;") count=0 while count<10: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") - if tdSql.checkRows(2): + if tdSql.queryRows == 2: tdLog.debug("drop mnode %d successfully"%(i+1)) break count+=1 + self.wait_for_transactions(100) + tdLog.debug("create mnode on dnode %d"%(i+1)) tdSql.execute("create mnode on dnode %d"%(i+1)) count=0 while count<10: - time.sleep(1) + time.sleep(0.1) tdSql.query("select * from information_schema.ins_mnodes;") - if tdSql.checkRows(3): - tdLog.debug("drop mnode %d successfully"%(i+1)) + if tdSql.queryRows == 3: + tdLog.debug("create mnode %d successfully"%(i+1)) break count+=1 + self.wait_for_transactions(100) dropcount+=1 self.check3mnode() + def wait_for_transactions(self, timeout): + count=0 + while count= timeout: + tdLog.debug("transactions not finished before timeout (%d secs)"%timeout) def getConnection(self, dnode): host = dnode.cfgDict["fqdn"] diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb.py b/tests/system-test/7-tmq/tmqConsFromTsdb.py index 9bb8c4cc0d..8ed4a6df97 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb.py @@ -130,7 +130,7 @@ class TDTestCase: tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectRowsList[0], resultList[0])) tdLog.exit("%d tmq consume rows error!"%consumerId) - tmqCom.checkFileContent(consumerId, queryString) + # tmqCom.checkFileContent(consumerId, queryString) time.sleep(10) for i in range(len(topicNameList)): diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py index 009862137f..4dcc0b963f 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py @@ -116,7 +116,7 @@ class TDTestCase: topicList = topicNameList[0] ifcheckdata = 1 ifManualCommit = 1 - keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) consumerId = 4 @@ -188,7 +188,7 @@ class TDTestCase: topicList = topicNameList[0] ifcheckdata = 1 ifManualCommit = 1 - keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) tdLog.info("start consume processor 0") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py index 528b3a8088..da8ac6c57d 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py @@ -116,7 +116,7 @@ class TDTestCase: topicList = topicNameList[0] ifcheckdata = 1 ifManualCommit = 1 - keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) consumerId = 4 @@ -188,7 +188,7 @@ class TDTestCase: topicList = topicNameList[0] ifcheckdata = 1 ifManualCommit = 1 - keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) tdLog.info("start consume processor 0") diff --git a/tests/system-test/7-tmq/tmqUpdate-1ctb.py b/tests/system-test/7-tmq/tmqUpdate-1ctb.py index b974e4a41a..db2ec3285d 100644 --- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py +++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py @@ -206,7 +206,7 @@ class TDTestCase: paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2)) elif self.snapshot == 1: expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1)) diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py index d5df88cf43..daffff44c1 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -213,9 +213,9 @@ class TDTestCase: paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2*2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2) * (1/2*3)) elif self.snapshot == 1: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1 + 1/2)) topicList = topicFromStb1 ifcheckdata = 1 diff --git a/tools/shell/inc/shellInt.h b/tools/shell/inc/shellInt.h index af724c1533..e2da695c92 100644 --- a/tools/shell/inc/shellInt.h +++ b/tools/shell/inc/shellInt.h @@ -147,5 +147,6 @@ void shellRunSingleCommandWebsocketImp(char *command); // shellMain.c extern SShellObj shell; +extern void tscWriteCrashInfo(int signum, void *sigInfo, void *context); #endif /*_TD_SHELL_INT_H_*/ diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index 986806fdd8..479c2cf39a 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -1136,10 +1136,8 @@ int32_t shellExecute() { taosSetSignal(SIGTERM, shellQueryInterruptHandler); taosSetSignal(SIGHUP, shellQueryInterruptHandler); - taosSetSignal(SIGABRT, shellQueryInterruptHandler); - taosSetSignal(SIGINT, shellQueryInterruptHandler); - + #ifdef WEBSOCKET if (!shell.args.restful && !shell.args.cloud) { #endif diff --git a/tools/shell/src/shellMain.c b/tools/shell/src/shellMain.c index fa3c0f2585..22b8e89959 100644 --- a/tools/shell/src/shellMain.c +++ b/tools/shell/src/shellMain.c @@ -19,6 +19,29 @@ SShellObj shell = {0}; + +void shellCrashHandler(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + +#if !defined(WINDOWS) + taosIgnSignal(SIGBUS); +#endif + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + tscWriteCrashInfo(signum, sigInfo, context); + +#ifdef _TD_DARWIN_64 + exit(signum); +#elif defined(WINDOWS) + exit(signum); +#endif +} + int main(int argc, char *argv[]) { shell.exit = false; #ifdef WEBSOCKET @@ -26,6 +49,13 @@ int main(int argc, char *argv[]) { shell.args.cloud = true; #endif +#if !defined(WINDOWS) + taosSetSignal(SIGBUS, shellCrashHandler); +#endif + taosSetSignal(SIGABRT, shellCrashHandler); + taosSetSignal(SIGFPE, shellCrashHandler); + taosSetSignal(SIGSEGV, shellCrashHandler); + if (shellCheckIntSize() != 0) { return -1; }