2023-06-05 18:23:24 +00:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import threading
|
2023-06-07 17:37:32 +00:00
|
|
|
import random
|
2023-06-05 18:23:24 +00:00
|
|
|
|
|
|
|
from helpers.client import QueryRuntimeException
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
# two replicas in remote_servers.xml
|
|
|
|
REPLICA_COUNT = 2
|
2023-06-05 18:23:24 +00:00
|
|
|
|
2023-06-08 10:29:01 +00:00
|
|
|
|
2023-06-05 18:23:24 +00:00
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def cluster():
|
|
|
|
try:
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
2023-06-07 17:37:32 +00:00
|
|
|
for i in range(1, REPLICA_COUNT + 1):
|
|
|
|
cluster.add_instance(
|
|
|
|
f"node{i}",
|
|
|
|
main_configs=[
|
|
|
|
"configs/config.d/storage_conf.xml",
|
|
|
|
"configs/config.d/remote_servers.xml",
|
|
|
|
],
|
|
|
|
with_minio=True,
|
|
|
|
with_zookeeper=True,
|
|
|
|
)
|
|
|
|
|
2023-06-05 18:23:24 +00:00
|
|
|
logging.info("Starting cluster...")
|
|
|
|
cluster.start()
|
|
|
|
logging.info("Cluster started")
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
def create_table(node, table_name, replicated, additional_settings):
|
2023-06-05 18:23:24 +00:00
|
|
|
settings = {
|
|
|
|
"storage_policy": "two_disks",
|
|
|
|
"old_parts_lifetime": 1,
|
|
|
|
"index_granularity": 512,
|
|
|
|
"temporary_directories_lifetime": 0,
|
|
|
|
"merge_tree_clear_old_temporary_directories_interval_seconds": 1,
|
|
|
|
}
|
|
|
|
settings.update(additional_settings)
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
table_engine = (
|
|
|
|
f"ReplicatedMergeTree('/clickhouse/tables/0/{table_name}', '{node.name}')"
|
|
|
|
if replicated
|
|
|
|
else "MergeTree()"
|
|
|
|
)
|
|
|
|
|
2023-06-05 18:23:24 +00:00
|
|
|
create_table_statement = f"""
|
|
|
|
CREATE TABLE {table_name} (
|
|
|
|
dt Date,
|
|
|
|
id Int64,
|
|
|
|
data String,
|
|
|
|
INDEX min_max (id) TYPE minmax GRANULARITY 3
|
2023-06-07 17:37:32 +00:00
|
|
|
) ENGINE = {table_engine}
|
2023-06-05 18:23:24 +00:00
|
|
|
PARTITION BY dt
|
|
|
|
ORDER BY (dt, id)
|
|
|
|
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
if replicated:
|
|
|
|
node.query_with_retry(create_table_statement)
|
|
|
|
else:
|
|
|
|
node.query(create_table_statement)
|
2023-06-05 18:23:24 +00:00
|
|
|
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"allow_remote_fs_zero_copy_replication,replicated_engine",
|
|
|
|
[(False, False), (False, True), (True, True)],
|
|
|
|
)
|
|
|
|
def test_create_table(
|
|
|
|
cluster, allow_remote_fs_zero_copy_replication, replicated_engine
|
|
|
|
):
|
|
|
|
if replicated_engine:
|
|
|
|
nodes = list(cluster.instances.values())
|
|
|
|
else:
|
|
|
|
nodes = [cluster.instances["node1"]]
|
2023-06-06 15:17:06 +00:00
|
|
|
|
|
|
|
additional_settings = {}
|
|
|
|
|
2023-06-08 10:29:01 +00:00
|
|
|
# Different names for logs readability
|
2023-06-07 17:37:32 +00:00
|
|
|
table_name = "test_table"
|
2023-06-06 15:17:06 +00:00
|
|
|
if allow_remote_fs_zero_copy_replication:
|
|
|
|
table_name = "test_table_zero_copy"
|
|
|
|
additional_settings["allow_remote_fs_zero_copy_replication"] = 1
|
2023-06-07 17:37:32 +00:00
|
|
|
if replicated_engine:
|
|
|
|
table_name = table_name + "_replicated"
|
2023-06-06 15:17:06 +00:00
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
for node in nodes:
|
|
|
|
create_table(node, table_name, replicated_engine, additional_settings)
|
2023-06-06 15:17:06 +00:00
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
for i in range(1, 11):
|
|
|
|
partition = f"2021-01-{i:02d}"
|
|
|
|
random.choice(nodes).query(
|
|
|
|
f"INSERT INTO {table_name} SELECT toDate('{partition}'), number as id, toString(sipHash64(number, {i})) FROM numbers(10_000)"
|
|
|
|
)
|
|
|
|
|
2023-06-08 10:29:01 +00:00
|
|
|
# Run ALTER in parallel with moving parts
|
2023-06-05 18:23:24 +00:00
|
|
|
|
|
|
|
stop_alter = False
|
|
|
|
|
|
|
|
def alter():
|
2023-06-07 17:37:32 +00:00
|
|
|
random.choice(nodes).query(f"ALTER TABLE {table_name} ADD COLUMN col0 String")
|
|
|
|
for d in range(1, 100):
|
|
|
|
if stop_alter:
|
|
|
|
break
|
2023-06-08 10:29:01 +00:00
|
|
|
|
|
|
|
# Some lightweight mutation should change moving part before it is swapped, then we will have to cleanup it.
|
|
|
|
# Messages `Failed to swap {}. Active part doesn't exist` should appear in logs.
|
|
|
|
#
|
|
|
|
# I managed to reproduce issue with DELETE (`ALTER TABLE {table_name} ADD/DROP COLUMN` also works on real s3 instead of minio)
|
|
|
|
# Note: do not delete rows with id % 100 = 0, because they are used in `check_count` to use them in check that data is not corrupted
|
2023-06-07 17:37:32 +00:00
|
|
|
random.choice(nodes).query(f"DELETE FROM {table_name} WHERE id % 100 = {d}")
|
2023-06-08 10:29:01 +00:00
|
|
|
|
2023-06-05 18:23:24 +00:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
|
|
alter_thread = threading.Thread(target=alter)
|
|
|
|
alter_thread.start()
|
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
for i in range(1, 11):
|
2023-06-05 18:23:24 +00:00
|
|
|
partition = f"2021-01-{i:02d}"
|
|
|
|
try:
|
2023-06-07 17:37:32 +00:00
|
|
|
random.choice(nodes).query(
|
2023-06-06 15:17:06 +00:00
|
|
|
f"ALTER TABLE {table_name} MOVE PARTITION '{partition}' TO DISK 's3'",
|
2023-06-05 18:23:24 +00:00
|
|
|
)
|
|
|
|
except QueryRuntimeException as e:
|
2023-06-06 15:17:06 +00:00
|
|
|
if "PART_IS_TEMPORARILY_LOCKED" in str(e):
|
|
|
|
continue
|
|
|
|
raise e
|
2023-06-05 18:23:24 +00:00
|
|
|
|
2023-06-07 17:37:32 +00:00
|
|
|
# Function to clear old temporary directories wakes up every 1 second, sleep to make sure it is called
|
2023-06-05 18:23:24 +00:00
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
|
|
stop_alter = True
|
|
|
|
alter_thread.join()
|
2023-06-07 17:37:32 +00:00
|
|
|
|
2023-06-08 10:29:01 +00:00
|
|
|
# Check that no data was lost
|
|
|
|
|
|
|
|
data_digest = None
|
|
|
|
if replicated_engine:
|
|
|
|
# We don't know what data was replicated, so we need to check all replicas and take unique values
|
|
|
|
data_digest = random.choice(nodes).query_with_retry(
|
|
|
|
f"SELECT countDistinct(dt, data) FROM clusterAllReplicas(test_cluster, default.{table_name}) WHERE id % 100 == 0"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
data_digest = random.choice(nodes).query(
|
|
|
|
f"SELECT countDistinct(dt, data) FROM {table_name} WHERE id % 100 == 0"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert data_digest == "1000\n"
|