2019-07-03 20:51:13 +00:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
2023-10-23 12:13:36 +00:00
|
|
|
from helpers.client import QueryRuntimeException
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node1 = cluster.add_instance("node1", with_zookeeper=True)
|
|
|
|
node2 = cluster.add_instance("node2", with_zookeeper=True)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
def corrupt_data_part_on_disk(node, table, part_name):
|
2020-09-16 04:26:10 +00:00
|
|
|
part_path = node.query(
|
2022-03-22 16:39:58 +00:00
|
|
|
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
|
|
|
|
table, part_name
|
|
|
|
)
|
|
|
|
).strip()
|
|
|
|
node.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
|
|
|
|
p=part_path
|
|
|
|
),
|
|
|
|
],
|
|
|
|
privileged=True,
|
|
|
|
)
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
def remove_checksums_on_disk(node, table, part_name):
|
2020-09-16 04:26:10 +00:00
|
|
|
part_path = node.query(
|
2022-03-22 16:39:58 +00:00
|
|
|
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
|
|
|
|
table, part_name
|
|
|
|
)
|
|
|
|
).strip()
|
|
|
|
node.exec_in_container(
|
|
|
|
["bash", "-c", "rm -r {p}/checksums.txt".format(p=part_path)], privileged=True
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2019-07-03 20:51:13 +00:00
|
|
|
def remove_part_from_disk(node, table, part_name):
|
2020-09-16 04:26:10 +00:00
|
|
|
part_path = node.query(
|
2022-03-22 16:39:58 +00:00
|
|
|
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
|
|
|
|
table, part_name
|
|
|
|
)
|
|
|
|
).strip()
|
2019-07-04 10:21:14 +00:00
|
|
|
if not part_path:
|
|
|
|
raise Exception("Part " + part_name + "doesn't exist")
|
2022-03-22 16:39:58 +00:00
|
|
|
node.exec_in_container(
|
|
|
|
["bash", "-c", "rm -r {p}/*".format(p=part_path)], privileged=True
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_check_normal_table_corruption(started_cluster):
|
2021-01-27 18:54:05 +00:00
|
|
|
node1.query("DROP TABLE IF EXISTS non_replicated_mt")
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node1.query(
|
|
|
|
"""
|
2021-01-27 18:54:05 +00:00
|
|
|
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
|
|
|
|
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
|
|
|
|
SETTINGS min_bytes_for_wide_part=0;
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
node1.query(
|
|
|
|
"INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt PARTITION 201902",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201902_1_1_0\t1\t\n"
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2019-12-17 12:40:18 +00:00
|
|
|
remove_checksums_on_disk(node1, "non_replicated_mt", "201902_1_1_0")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
).strip()
|
|
|
|
== "201902_1_1_0\t1\tChecksums recounted and written to disk."
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n"
|
|
|
|
|
2019-12-17 12:40:18 +00:00
|
|
|
remove_checksums_on_disk(node1, "non_replicated_mt", "201902_1_1_0")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt PARTITION 201902",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
).strip()
|
|
|
|
== "201902_1_1_0\t1\tChecksums recounted and written to disk."
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n"
|
|
|
|
|
2019-12-17 12:40:18 +00:00
|
|
|
corrupt_data_part_on_disk(node1, "non_replicated_mt", "201902_1_1_0")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2023-09-19 16:42:46 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2023-09-19 16:42:46 +00:00
|
|
|
).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2023-09-19 16:42:46 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2023-09-19 16:42:46 +00:00
|
|
|
).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
|
2022-03-22 16:39:58 +00:00
|
|
|
|
|
|
|
node1.query(
|
|
|
|
"INSERT INTO non_replicated_mt VALUES (toDate('2019-01-01'), 1, 10), (toDate('2019-01-01'), 2, 12)"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201901_2_2_0\t1\t\n"
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2019-12-17 12:40:18 +00:00
|
|
|
corrupt_data_part_on_disk(node1, "non_replicated_mt", "201901_2_2_0")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2019-12-17 12:40:18 +00:00
|
|
|
remove_checksums_on_disk(node1, "non_replicated_mt", "201901_2_2_0")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2023-09-19 16:42:46 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE non_replicated_mt PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2023-09-19 16:42:46 +00:00
|
|
|
).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_check_replicated_table_simple(started_cluster):
|
2021-01-27 18:54:05 +00:00
|
|
|
for node in [node1, node2]:
|
|
|
|
node.query("DROP TABLE IF EXISTS replicated_mt")
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node.query(
|
|
|
|
"""
|
2021-01-27 18:54:05 +00:00
|
|
|
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
|
|
|
|
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
2022-03-22 16:39:58 +00:00
|
|
|
""".format(
|
|
|
|
replica=node.name
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
node1.query(
|
|
|
|
"INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)"
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
node2.query("SYSTEM SYNC REPLICA replicated_mt")
|
|
|
|
|
|
|
|
assert node1.query("SELECT count() from replicated_mt") == "2\n"
|
|
|
|
assert node2.query("SELECT count() from replicated_mt") == "2\n"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
node1.query(
|
2023-08-14 10:06:57 +00:00
|
|
|
"CHECK TABLE replicated_mt",
|
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201902_0_0_0\t1\t\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
2023-08-14 10:06:57 +00:00
|
|
|
"CHECK TABLE replicated_mt",
|
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201902_0_0_0\t1\t\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
node2.query(
|
|
|
|
"INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)"
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
node1.query("SYSTEM SYNC REPLICA replicated_mt")
|
|
|
|
assert node1.query("SELECT count() from replicated_mt") == "4\n"
|
|
|
|
assert node2.query("SELECT count() from replicated_mt") == "4\n"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"CHECK TABLE replicated_mt PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201901_0_0_0\t1\t\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"CHECK TABLE replicated_mt PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
== "201901_0_0_0\t1\t\n"
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2023-10-23 12:13:36 +00:00
|
|
|
assert sorted(
|
|
|
|
node2.query(
|
|
|
|
"CHECK TABLE replicated_mt",
|
|
|
|
settings={"check_query_single_value_result": 0},
|
|
|
|
).split("\n")
|
|
|
|
) == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"]
|
|
|
|
|
|
|
|
with pytest.raises(QueryRuntimeException) as exc:
|
|
|
|
node2.query(
|
|
|
|
"CHECK TABLE replicated_mt PART '201801_0_0_0'",
|
|
|
|
settings={"check_query_single_value_result": 0},
|
|
|
|
)
|
|
|
|
assert "NO_SUCH_DATA_PART" in str(exc.value)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"CHECK TABLE replicated_mt PART '201902_0_0_0'",
|
|
|
|
settings={"check_query_single_value_result": 0},
|
|
|
|
)
|
|
|
|
== "201902_0_0_0\t1\t\n"
|
|
|
|
)
|
|
|
|
|
2019-07-03 20:51:13 +00:00
|
|
|
|
|
|
|
def test_check_replicated_table_corruption(started_cluster):
|
2021-01-27 18:54:05 +00:00
|
|
|
for node in [node1, node2]:
|
2021-03-16 10:00:49 +00:00
|
|
|
node.query_with_retry("DROP TABLE IF EXISTS replicated_mt_1")
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node.query_with_retry(
|
|
|
|
"""
|
2021-01-27 18:54:05 +00:00
|
|
|
CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
|
|
|
|
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
2022-03-22 16:39:58 +00:00
|
|
|
""".format(
|
|
|
|
replica=node.name
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
node1.query(
|
|
|
|
"INSERT INTO replicated_mt_1 VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)"
|
|
|
|
)
|
|
|
|
node1.query(
|
|
|
|
"INSERT INTO replicated_mt_1 VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)"
|
|
|
|
)
|
2021-01-27 18:54:05 +00:00
|
|
|
node2.query("SYSTEM SYNC REPLICA replicated_mt_1")
|
|
|
|
|
|
|
|
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|
|
|
|
assert node2.query("SELECT count() from replicated_mt_1") == "4\n"
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2021-03-16 10:00:49 +00:00
|
|
|
part_name = node1.query_with_retry(
|
2022-03-22 16:39:58 +00:00
|
|
|
"SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1"
|
|
|
|
).strip()
|
2019-07-09 09:02:52 +00:00
|
|
|
|
2021-01-27 18:54:05 +00:00
|
|
|
corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name)
|
2022-03-22 16:39:58 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE replicated_mt_1 PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
|
|
|
|
p=part_name
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2021-03-16 10:00:49 +00:00
|
|
|
node1.query_with_retry("SYSTEM SYNC REPLICA replicated_mt_1")
|
2022-03-22 16:39:58 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE replicated_mt_1 PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
) == "{}\t1\t\n".format(part_name)
|
2021-01-27 18:54:05 +00:00
|
|
|
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2021-01-27 18:54:05 +00:00
|
|
|
remove_part_from_disk(node2, "replicated_mt_1", part_name)
|
2022-03-22 16:39:58 +00:00
|
|
|
assert node2.query(
|
|
|
|
"CHECK TABLE replicated_mt_1 PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
|
|
|
|
p=part_name
|
|
|
|
)
|
2019-07-03 20:51:13 +00:00
|
|
|
|
2021-01-27 18:54:05 +00:00
|
|
|
node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
|
2022-03-22 16:39:58 +00:00
|
|
|
assert node1.query(
|
|
|
|
"CHECK TABLE replicated_mt_1 PARTITION 201901",
|
2023-08-14 10:06:57 +00:00
|
|
|
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
2022-03-22 16:39:58 +00:00
|
|
|
) == "{}\t1\t\n".format(part_name)
|
2021-01-27 18:54:05 +00:00
|
|
|
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|