ClickHouse/tests/integration/test_s3_zero_copy_replication/test.py

import datetime
import logging
import time

import pytest
from helpers.cluster import ClickHouseCluster

logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())


@pytest.fixture(scope="module")
def cluster():
    try:
        cluster = ClickHouseCluster(__file__)
        cluster.add_instance("node1", main_configs=["configs/config.d/s3.xml"], macros={'replica': '1'},
                             with_minio=True,
                             with_zookeeper=True)
        cluster.add_instance("node2", main_configs=["configs/config.d/s3.xml"], macros={'replica': '2'},
                             with_minio=True,
                             with_zookeeper=True)
        logging.info("Starting cluster...")
        cluster.start()
        logging.info("Cluster started")

        yield cluster
    finally:
        cluster.shutdown()


def get_large_objects_count(cluster, size=100, folder='data'):
    minio = cluster.minio_client
    counter = 0
    for obj in minio.list_objects(cluster.minio_bucket, '{}/'.format(folder)):
        if obj.size >= size:
            counter = counter + 1
    return counter


def wait_for_large_objects_count(cluster, expected, size=100, timeout=30):
    while timeout > 0:
        if get_large_objects_count(cluster, size=size) == expected:
            return
        timeout -= 1
        time.sleep(1)
    assert get_large_objects_count(cluster, size=size) == expected


def wait_for_active_parts(node, num_expected_parts, table_name, timeout=30):
    deadline = time.monotonic() + timeout
    num_parts = 0
    while time.monotonic() < deadline:
        num_parts_str = node.query("select count() from system.parts where table = '{}' and active".format(table_name))
        num_parts = int(num_parts_str.strip())
        if num_parts == num_expected_parts:
            return

        time.sleep(0.2)

    assert num_parts == num_expected_parts


# Result of `get_large_objects_count` can be changed in other tests, so run this case at the beginning
@pytest.mark.order(0)
@pytest.mark.parametrize(
    "policy", ["s3"]
)
def test_s3_zero_copy_replication(cluster, policy):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node2"]

    node1.query(
        """
        CREATE TABLE s3_test ON CLUSTER test_cluster (id UInt32, value String)
        ENGINE=ReplicatedMergeTree('/clickhouse/tables/s3_test', '{}')
        ORDER BY id
        SETTINGS storage_policy='{}'
        """
            .format('{replica}', policy)
    )

    node1.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')")
    node2.query("SYSTEM SYNC REPLICA s3_test")
    assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')"
    assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')"

    # Based on version 21.x - should be only 1 file with size 100+ (checksums.txt), used by both nodes
    assert get_large_objects_count(cluster) == 1

    node2.query("INSERT INTO s3_test VALUES (2,'data'),(3,'data')")
    node1.query("SYSTEM SYNC REPLICA s3_test")

    assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"
    assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"

    # Based on version 21.x - two parts
    wait_for_large_objects_count(cluster, 2)

    node1.query("OPTIMIZE TABLE s3_test FINAL")

    # Based on version 21.x - after merge, two old parts and one merged
    wait_for_large_objects_count(cluster, 3)

    # Based on version 21.x - after cleanup - only one merged part
    wait_for_large_objects_count(cluster, 1, timeout=60)

    node1.query("DROP TABLE IF EXISTS s3_test NO DELAY")
    node2.query("DROP TABLE IF EXISTS s3_test NO DELAY")


def test_s3_zero_copy_on_hybrid_storage(cluster):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node2"]

    node1.query(
        """
        CREATE TABLE hybrid_test ON CLUSTER test_cluster (id UInt32, value String)
        ENGINE=ReplicatedMergeTree('/clickhouse/tables/hybrid_test', '{}')
        ORDER BY id
        SETTINGS storage_policy='hybrid'
        """
            .format('{replica}')
    )

    node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')")
    node2.query("SYSTEM SYNC REPLICA hybrid_test")

    assert node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"
    assert node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"

    assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"
    assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"

    node1.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'")

    assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"
    assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"

    # Total objects in S3
    s3_objects = get_large_objects_count(cluster, size=0)

    node2.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'")

    assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"
    assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"

    # Check that after moving partition on node2 no new obects on s3
    wait_for_large_objects_count(cluster, s3_objects, size=0)

    assert node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"
    assert node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"

    node1.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")
    node2.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")


def insert_data_time(node, table, number_of_mb, time, start=0):
    values = ','.join(f"({x},{time})" for x in range(start, int((1024 * 1024 * number_of_mb) / 8) + start + 1))
    node.query(f"INSERT INTO {table} VALUES {values}")


def insert_large_data(node, table):
    tm = time.mktime((datetime.date.today() - datetime.timedelta(days=7)).timetuple())
    insert_data_time(node, table, 1, tm, 0)
    tm = time.mktime((datetime.date.today() - datetime.timedelta(days=3)).timetuple())
    insert_data_time(node, table, 1, tm, 1024*1024)
    tm = time.mktime(datetime.date.today().timetuple())
    insert_data_time(node, table, 10, tm, 1024*1024*2)


@pytest.mark.parametrize(
    ("storage_policy", "large_data", "iterations"),
    [
        ("tiered", False, 10),
        ("tiered_copy", False, 10),
        ("tiered", True, 3),
        ("tiered_copy", True, 3),
    ]
)
def test_s3_zero_copy_with_ttl_move(cluster, storage_policy, large_data, iterations):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node2"]

    node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
    node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")

    for i in range(iterations):
        node1.query(
            """
            CREATE TABLE ttl_move_test ON CLUSTER test_cluster (d UInt64, d1 DateTime)
            ENGINE=ReplicatedMergeTree('/clickhouse/tables/ttl_move_test', '{}')
            ORDER BY d
            TTL d1 + INTERVAL 2 DAY TO VOLUME 'external'
            SETTINGS storage_policy='{}'
            """
                .format('{replica}', storage_policy)
        )

        if large_data:
            insert_large_data(node1, 'ttl_move_test')
        else:
            node1.query("INSERT INTO ttl_move_test VALUES (10, now() - INTERVAL 3 DAY)")
            node1.query("INSERT INTO ttl_move_test VALUES (11, now() - INTERVAL 1 DAY)")

        node1.query("OPTIMIZE TABLE ttl_move_test FINAL")
        node2.query("SYSTEM SYNC REPLICA ttl_move_test")

        if large_data:
            assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(1572867)"
            assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(1572867)"
        else:
            assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)"
            assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)"
            assert node1.query("SELECT d FROM ttl_move_test ORDER BY d FORMAT Values") == "(10),(11)"
            assert node2.query("SELECT d FROM ttl_move_test ORDER BY d FORMAT Values") == "(10),(11)"

        node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
        node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")


@pytest.mark.parametrize(
    ("large_data", "iterations"),
    [
        (False, 10),
        (True, 3),
    ]
)
def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node2"]

    node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
    node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")

    for i in range(iterations):
        node1.query(
            """
            CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (d UInt64, d1 DateTime)
            ENGINE=ReplicatedMergeTree('/clickhouse/tables/ttl_delete_test', '{}')
            ORDER BY d
            TTL d1 + INTERVAL 2 DAY
            SETTINGS storage_policy='tiered'
            """
                .format('{replica}')
        )

        if large_data:
            insert_large_data(node1, 'ttl_delete_test')
        else:
            node1.query("INSERT INTO ttl_delete_test VALUES (10, now() - INTERVAL 3 DAY)")
            node1.query("INSERT INTO ttl_delete_test VALUES (11, now() - INTERVAL 1 DAY)")

        node1.query("OPTIMIZE TABLE ttl_delete_test FINAL")
        node2.query("SYSTEM SYNC REPLICA ttl_delete_test")

        if large_data:
            assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1310721)"
            assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1310721)"
        else:
            assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)"
            assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)"
            assert node1.query("SELECT d FROM ttl_delete_test ORDER BY d FORMAT Values") == "(11)"
            assert node2.query("SELECT d FROM ttl_delete_test ORDER BY d FORMAT Values") == "(11)"

        node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
        node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")


def test_s3_zero_copy_concurrent_merge(cluster):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node2"]

    node1.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")
    node2.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")

    for node in (node1, node2):
        node.query(
        """
        CREATE TABLE concurrent_merge (id UInt64)
        ENGINE=ReplicatedMergeTree('/clickhouse/tables/concurrent_merge', '{replica}')
        ORDER BY id
        SETTINGS index_granularity=2, storage_policy='s3', remote_fs_execute_merges_on_single_replica_time_threshold=1
        """
    )

    node1.query("system stop merges")
    node2.query("system stop merges")

    # This will generate two parts with 20 granules each
    node1.query("insert into concurrent_merge select number from numbers(40)")
    node1.query("insert into concurrent_merge select number + 1 from numbers(40)")

    wait_for_active_parts(node2, 2, 'concurrent_merge')

    # Merge will materialize default column, it should sleep every granule and take 20 * 2 * 0.1 = 4 sec.
    node1.query("alter table concurrent_merge add column x UInt32 default sleep(0.1)")

    node1.query("system start merges")
    node2.query("system start merges")

    # Now, the merge should start.
    # Because of remote_fs_execute_merges_on_single_replica_time_threshold=1,
    # only one replica will start merge instantly.
    # The other replica should wait for 1 sec and also start it.
    # That should probably cause a data race at s3 storage.
    # For now, it does not happen (every blob has a random name, and we just have a duplicating data)
    node1.query("optimize table concurrent_merge final")

    wait_for_active_parts(node1, 1, 'concurrent_merge')
    wait_for_active_parts(node2, 1, 'concurrent_merge')

    for node in (node1, node2):
        assert node.query('select sum(id) from concurrent_merge').strip() == '1600'
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`import datetime`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`import logging`
			`import time`

			`import pytest`
			`from helpers.cluster import ClickHouseCluster`

			`logging.getLogger().setLevel(logging.INFO)`
			`logging.getLogger().addHandler(logging.StreamHandler())`


			`@pytest.fixture(scope="module")`
			`def cluster():`
			`try:`
			`cluster = ClickHouseCluster(__file__)`
			`cluster.add_instance("node1", main_configs=["configs/config.d/s3.xml"], macros={'replica': '1'},`
			`with_minio=True,`
			`with_zookeeper=True)`
			`cluster.add_instance("node2", main_configs=["configs/config.d/s3.xml"], macros={'replica': '2'},`
			`with_minio=True,`
			`with_zookeeper=True)`
			`logging.info("Starting cluster...")`
			`cluster.start()`
			`logging.info("Cluster started")`

			`yield cluster`
			`finally:`
			`cluster.shutdown()`


Fix Zero Copy after merge master 2021-05-17 13:01:08 +00:00			`def get_large_objects_count(cluster, size=100, folder='data'):`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`minio = cluster.minio_client`
			`counter = 0`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`for obj in minio.list_objects(cluster.minio_bucket, '{}/'.format(folder)):`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`if obj.size >= size:`
			`counter = counter + 1`
			`return counter`


Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`def wait_for_large_objects_count(cluster, expected, size=100, timeout=30):`
			`while timeout > 0:`
Fix Zero Copy after merge master 2021-05-17 13:01:08 +00:00			`if get_large_objects_count(cluster, size=size) == expected:`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`return`
			`timeout -= 1`
			`time.sleep(1)`
Fix Zero Copy after merge master 2021-05-17 13:01:08 +00:00			`assert get_large_objects_count(cluster, size=size) == expected`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00

Add test_s3_zero_copy_concurrent_merge 2021-12-13 13:34:04 +00:00			`def wait_for_active_parts(node, num_expected_parts, table_name, timeout=30):`
			`deadline = time.monotonic() + timeout`
			`num_parts = 0`
			`while time.monotonic() < deadline:`
			`num_parts_str = node.query("select count() from system.parts where table = '{}' and active".format(table_name))`
			`num_parts = int(num_parts_str.strip())`
			`if num_parts == num_expected_parts:`
			`return`

			`time.sleep(0.2)`

			`assert num_parts == num_expected_parts`


Try fix flaky test: order for test_s3_zero_copy_replication 2021-12-16 11:51:43 +00:00			# Result of `get_large_objects_count` can be changed in other tests, so run this case at the beginning
			`@pytest.mark.order(0)`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`@pytest.mark.parametrize(`
			`"policy", ["s3"]`
			`)`
			`def test_s3_zero_copy_replication(cluster, policy):`
			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances["node2"]`

			`node1.query(`
			`"""`
			`CREATE TABLE s3_test ON CLUSTER test_cluster (id UInt32, value String)`
			`ENGINE=ReplicatedMergeTree('/clickhouse/tables/s3_test', '{}')`
			`ORDER BY id`
			`SETTINGS storage_policy='{}'`
			`"""`
			`.format('{replica}', policy)`
			`)`

			`node1.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')")`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`node2.query("SYSTEM SYNC REPLICA s3_test")`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')"`
			`assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')"`

fix tests 2021-09-20 13:09:20 +00:00			`# Based on version 21.x - should be only 1 file with size 100+ (checksums.txt), used by both nodes`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`assert get_large_objects_count(cluster) == 1`

			`node2.query("INSERT INTO s3_test VALUES (2,'data'),(3,'data')")`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`node1.query("SYSTEM SYNC REPLICA s3_test")`

Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00			`assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"`
			`assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"`

fix tests 2021-08-02 15:42:16 +00:00			`# Based on version 21.x - two parts`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`wait_for_large_objects_count(cluster, 2)`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`node1.query("OPTIMIZE TABLE s3_test FINAL")`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00
fix tests 2021-08-02 15:42:16 +00:00			`# Based on version 21.x - after merge, two old parts and one merged`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`wait_for_large_objects_count(cluster, 3)`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00
fix tests 2021-08-02 15:42:16 +00:00			`# Based on version 21.x - after cleanup - only one merged part`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`wait_for_large_objects_count(cluster, 1, timeout=60)`
Zero copy replication over S3: base tests 2020-10-19 12:20:45 +00:00
			`node1.query("DROP TABLE IF EXISTS s3_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS s3_test NO DELAY")`

Fix S3 Zero-Copy replication for hybrid storage 2021-03-30 17:38:04 +00:00
			`def test_s3_zero_copy_on_hybrid_storage(cluster):`
			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances["node2"]`

			`node1.query(`
			`"""`
			`CREATE TABLE hybrid_test ON CLUSTER test_cluster (id UInt32, value String)`
Fix flapping test_s3_zero_copy_replication 2021-04-01 13:40:52 +00:00			`ENGINE=ReplicatedMergeTree('/clickhouse/tables/hybrid_test', '{}')`
Fix S3 Zero-Copy replication for hybrid storage 2021-03-30 17:38:04 +00:00			`ORDER BY id`
			`SETTINGS storage_policy='hybrid'`
			`"""`
			`.format('{replica}')`
			`)`

			`node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')")`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`node2.query("SYSTEM SYNC REPLICA hybrid_test")`
Fix S3 Zero-Copy replication for hybrid storage 2021-03-30 17:38:04 +00:00
			`assert node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"`
			`assert node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"`

			`assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"`
			`assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"`

			`node1.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'")`

			`assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"`
			`assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','default')"`

			`# Total objects in S3`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`s3_objects = get_large_objects_count(cluster, size=0)`
Fix S3 Zero-Copy replication for hybrid storage 2021-03-30 17:38:04 +00:00
			`node2.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'")`

			`assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"`
			`assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values") == "('all','s31')"`

			`# Check that after moving partition on node2 no new obects on s3`
Fix flapping tests test_s3_zero_copy_replication, test_s3_zero_copy_on_hybrid_storage 2021-04-16 10:23:38 +00:00			`wait_for_large_objects_count(cluster, s3_objects, size=0)`
Fix S3 Zero-Copy replication for hybrid storage 2021-03-30 17:38:04 +00:00
			`assert node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"`
			`assert node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") == "(0,'data'),(1,'data')"`
Fix flapping test_s3_zero_copy_replication 2021-04-01 13:40:52 +00:00
			`node1.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00

Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`def insert_data_time(node, table, number_of_mb, time, start=0):`
			`values = ','.join(f"({x},{time})" for x in range(start, int((1024 * 1024 * number_of_mb) / 8) + start + 1))`
			`node.query(f"INSERT INTO {table} VALUES {values}")`


			`def insert_large_data(node, table):`
			`tm = time.mktime((datetime.date.today() - datetime.timedelta(days=7)).timetuple())`
			`insert_data_time(node, table, 1, tm, 0)`
			`tm = time.mktime((datetime.date.today() - datetime.timedelta(days=3)).timetuple())`
			`insert_data_time(node, table, 1, tm, 1024*1024)`
			`tm = time.mktime(datetime.date.today().timetuple())`
			`insert_data_time(node, table, 10, tm, 102410242)`


Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`@pytest.mark.parametrize(`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`("storage_policy", "large_data", "iterations"),`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`[`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`("tiered", False, 10),`
			`("tiered_copy", False, 10),`
			`("tiered", True, 3),`
			`("tiered_copy", True, 3),`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`]`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`)`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`def test_s3_zero_copy_with_ttl_move(cluster, storage_policy, large_data, iterations):`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances["node2"]`

			`node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")`

Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`for i in range(iterations):`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`node1.query(`
			`"""`
			`CREATE TABLE ttl_move_test ON CLUSTER test_cluster (d UInt64, d1 DateTime)`
			`ENGINE=ReplicatedMergeTree('/clickhouse/tables/ttl_move_test', '{}')`
			`ORDER BY d`
			`TTL d1 + INTERVAL 2 DAY TO VOLUME 'external'`
			`SETTINGS storage_policy='{}'`
			`"""`
			`.format('{replica}', storage_policy)`
			`)`

Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`if large_data:`
			`insert_large_data(node1, 'ttl_move_test')`
			`else:`
			`node1.query("INSERT INTO ttl_move_test VALUES (10, now() - INTERVAL 3 DAY)")`
			`node1.query("INSERT INTO ttl_move_test VALUES (11, now() - INTERVAL 1 DAY)")`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00
			`node1.query("OPTIMIZE TABLE ttl_move_test FINAL")`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`node2.query("SYSTEM SYNC REPLICA ttl_move_test")`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`if large_data:`
			`assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(1572867)"`
			`assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(1572867)"`
			`else:`
			`assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)"`
			`assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)"`
			`assert node1.query("SELECT d FROM ttl_move_test ORDER BY d FORMAT Values") == "(10),(11)"`
			`assert node2.query("SELECT d FROM ttl_move_test ORDER BY d FORMAT Values") == "(10),(11)"`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00
			`node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")`


Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`@pytest.mark.parametrize(`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`("large_data", "iterations"),`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`[`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`(False, 10),`
			`(True, 3),`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`]`
			`)`
Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations):`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances["node2"]`

			`node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")`

Decrease iterations in test_s3_zero_copy_replication 2021-05-24 13:05:11 +00:00			`for i in range(iterations):`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00			`node1.query(`
			`"""`
			`CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (d UInt64, d1 DateTime)`
			`ENGINE=ReplicatedMergeTree('/clickhouse/tables/ttl_delete_test', '{}')`
			`ORDER BY d`
			`TTL d1 + INTERVAL 2 DAY`
			`SETTINGS storage_policy='tiered'`
			`"""`
			`.format('{replica}')`
			`)`

Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`if large_data:`
			`insert_large_data(node1, 'ttl_delete_test')`
			`else:`
			`node1.query("INSERT INTO ttl_delete_test VALUES (10, now() - INTERVAL 3 DAY)")`
			`node1.query("INSERT INTO ttl_delete_test VALUES (11, now() - INTERVAL 1 DAY)")`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00
			`node1.query("OPTIMIZE TABLE ttl_delete_test FINAL")`
Zero-copy replication - add test for TTL with large data 2021-05-17 14:12:47 +00:00			`node2.query("SYSTEM SYNC REPLICA ttl_delete_test")`

			`if large_data:`
			`assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1310721)"`
			`assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1310721)"`
			`else:`
			`assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)"`
			`assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)"`
			`assert node1.query("SELECT d FROM ttl_delete_test ORDER BY d FORMAT Values") == "(11)"`
			`assert node2.query("SELECT d FROM ttl_delete_test ORDER BY d FORMAT Values") == "(11)"`
Fix Zero-Copy replication with several S3 volumes (issue 22679) 2021-04-08 18:27:56 +00:00
			`node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")`
Add test_s3_zero_copy_concurrent_merge 2021-12-13 13:34:04 +00:00

			`def test_s3_zero_copy_concurrent_merge(cluster):`
			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances["node2"]`

			`node1.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")`
			`node2.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")`

			`for node in (node1, node2):`
			`node.query(`
			`"""`
			`CREATE TABLE concurrent_merge (id UInt64)`
			`ENGINE=ReplicatedMergeTree('/clickhouse/tables/concurrent_merge', '{replica}')`
			`ORDER BY id`
			`SETTINGS index_granularity=2, storage_policy='s3', remote_fs_execute_merges_on_single_replica_time_threshold=1`
			`"""`
			`)`

			`node1.query("system stop merges")`
			`node2.query("system stop merges")`

			`# This will generate two parts with 20 granules each`
			`node1.query("insert into concurrent_merge select number from numbers(40)")`
			`node1.query("insert into concurrent_merge select number + 1 from numbers(40)")`

			`wait_for_active_parts(node2, 2, 'concurrent_merge')`

			`# Merge will materialize default column, it should sleep every granule and take 20 * 2 * 0.1 = 4 sec.`
			`node1.query("alter table concurrent_merge add column x UInt32 default sleep(0.1)")`

			`node1.query("system start merges")`
			`node2.query("system start merges")`

			`# Now, the merge should start.`
			`# Because of remote_fs_execute_merges_on_single_replica_time_threshold=1,`
			`# only one replica will start merge instantly.`
			`# The other replica should wait for 1 sec and also start it.`
			`# That should probably cause a data race at s3 storage.`
			`# For now, it does not happen (every blob has a random name, and we just have a duplicating data)`
			`node1.query("optimize table concurrent_merge final")`

			`wait_for_active_parts(node1, 1, 'concurrent_merge')`
			`wait_for_active_parts(node2, 1, 'concurrent_merge')`

			`for node in (node1, node2):`
			`assert node.query('select sum(id) from concurrent_merge').strip() == '1600'`