ClickHouse/tests/integration/test_broken_detached_part_clean_up/test.py
Alexander Tokmakov 3d59ebe108 fix
2023-08-10 20:11:22 +02:00

361 lines
13 KiB
Python

import pytest
from helpers.cluster import ClickHouseCluster
from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk
from helpers.corrupt_part_data_on_disk import break_part
import time
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", stay_alive=True, main_configs=["configs/store_cleanup.xml"]
)
path_to_data = "/var/lib/clickhouse/"
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
assert (
node.query(
f"SELECT COUNT() FROM system.parts WHERE table='{table}' AND active=1"
)
== "4\n"
)
path_to_detached = path_to_data + f"data/default/{table}/detached/"
result = node.exec_in_container(["ls", path_to_detached])
assert result.strip() == ""
corrupt_part_data_on_disk(node, table, "all_3_3_0")
break_part(node, table, "all_3_3_0")
node.query(f"ALTER TABLE {table} DETACH PART 'all_1_1_0'")
result = node.exec_in_container(["touch", f"{path_to_detached}trash"])
node.exec_in_container(["mkdir", f"{path_to_detached}../broken_all_fake"])
node.exec_in_container(
["touch", "-t", "1312031429.30", f"{path_to_detached}../broken_all_fake"]
)
result = node.exec_in_container(["stat", f"{path_to_detached}../broken_all_fake"])
print(result)
assert "Modify: 2013-12-03" in result
node.exec_in_container(
[
"mv",
f"{path_to_detached}../broken_all_fake",
f"{path_to_detached}broken_all_fake",
]
)
for name in [
"unexpected_all_42_1337_5",
"deleting_all_123_456_7",
"covered-by-broken_all_12_34_5",
]:
node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"])
node.exec_in_container(
[
"touch",
"-t",
"1312031429.30",
f"{path_to_detached}../{name}",
]
)
result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"])
print(result)
assert "Modify: 2013-12-03" in result
node.exec_in_container(
[
"mv",
f"{path_to_detached}../{name}",
f"{path_to_detached}{name}",
]
)
result = node.query(
f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0}
)
assert "all_3_3_0\t0" in result
node.query(f"DETACH TABLE {table}")
node.query(f"ATTACH TABLE {table}")
node.wait_for_log_line(
"Removing detached part deleting_all_123_456_7",
timeout=90,
look_behind_lines=1000000,
)
node.wait_for_log_line(
f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout",
timeout=10,
look_behind_lines=1000000,
)
node.wait_for_log_line(
"Removed broken detached part unexpected_all_42_1337_5 due to a timeout",
timeout=10,
look_behind_lines=1000000,
)
result = node.exec_in_container(["ls", path_to_detached])
print(result)
assert f"{expect_broken_prefix}_all_3_3_0" not in result
assert "all_1_1_0" in result
assert "trash" in result
assert "broken_all_fake" in result
assert "covered-by-broken_all_12_34_5" in result
assert "unexpected_all_42_1337_5" not in result
assert "deleting_all_123_456_7" not in result
node.query(
f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'",
settings={"allow_drop_detached": 1},
)
result = node.exec_in_container(["ls", path_to_detached])
assert "covered-by-broken_all_12_34_5" not in result
node.query(f"DROP TABLE {table} SYNC")
def test_remove_broken_detached_part_merge_tree(started_cluster):
node1.query(
"""
CREATE TABLE
mt(id UInt32, value Int32)
ENGINE = MergeTree() ORDER BY id
SETTINGS
merge_tree_enable_clear_old_broken_detached=1,
merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds=5;
"""
)
for i in range(4):
node1.query(
f"INSERT INTO mt SELECT number, number * number FROM numbers ({i * 100000}, 100000)"
)
remove_broken_detached_part_impl("mt", node1, "broken-on-start")
def test_remove_broken_detached_part_replicated_merge_tree(started_cluster):
node1.query(
f"""
CREATE TABLE
replicated_mt(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{node1.name}') ORDER BY id
SETTINGS
merge_tree_enable_clear_old_broken_detached=1,
merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds=5,
cleanup_delay_period=1,
cleanup_delay_period_random_add=0,
cleanup_thread_preferred_points_per_iteration=0;
"""
)
for i in range(4):
node1.query(
f"INSERT INTO replicated_mt SELECT toDate('2019-10-01'), number, number * number FROM numbers ({i * 100000}, 100000)"
)
remove_broken_detached_part_impl("replicated_mt", node1, "broken")
def test_store_cleanup(started_cluster):
node1.query("CREATE DATABASE db UUID '10000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db.log UUID '10000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query(
"CREATE TABLE db.mt UUID '10000000-1000-4000-8000-000000000003' ENGINE=MergeTree ORDER BY tuple() AS SELECT 1"
)
node1.query(
"CREATE TABLE db.mem UUID '10000000-1000-4000-8000-000000000004' ENGINE=Memory AS SELECT 1"
)
node1.query("CREATE DATABASE db2 UUID '20000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db2.log UUID '20000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query("DETACH DATABASE db2")
node1.query("CREATE DATABASE db3 UUID '30000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db3.log UUID '30000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query(
"CREATE TABLE db3.log2 UUID '30000000-1000-4000-8000-000000000003' ENGINE=Log AS SELECT 1"
)
node1.query("DETACH TABLE db3.log")
node1.query("DETACH TABLE db3.log2 PERMANENTLY")
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
node1.stop_clickhouse(kill=True)
# All dirs related to `db` will be removed
node1.exec_in_container(["rm", f"{path_to_data}/metadata/db.sql"])
node1.exec_in_container(["mkdir", f"{path_to_data}/store/kek"])
node1.exec_in_container(["touch", f"{path_to_data}/store/12"])
try:
node1.exec_in_container(["mkdir", f"{path_to_data}/store/456"])
except Exception as e:
print("Failed to create 456/:", str(e))
node1.exec_in_container(["mkdir", f"{path_to_data}/store/456/testgarbage"])
node1.exec_in_container(
["mkdir", f"{path_to_data}/store/456/30000000-1000-4000-8000-000000000003"]
)
node1.exec_in_container(
["touch", f"{path_to_data}/store/456/45600000-1000-4000-8000-000000000003"]
)
node1.exec_in_container(
["mkdir", f"{path_to_data}/store/456/45600000-1000-4000-8000-000000000004"]
)
node1.start_clickhouse()
node1.query("DETACH DATABASE db2")
node1.query("DETACH TABLE db3.log")
node1.wait_for_log_line(
"Removing access rights for unused directory",
timeout=60,
look_behind_lines=1000000,
)
node1.wait_for_log_line(
"directories from store", timeout=60, look_behind_lines=1000000
)
store = node1.exec_in_container(["ls", f"{path_to_data}/store"])
assert "100" in store
assert "200" in store
assert "300" in store
assert "456" in store
assert "kek" in store
assert "12" in store
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
)
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
)
# Metadata is removed, so store/100 contains garbage
store100 = node1.exec_in_container(["ls", f"{path_to_data}/store/100"])
assert "10000000-1000-4000-8000-000000000001" in store100
assert "10000000-1000-4000-8000-000000000002" in store100
assert "10000000-1000-4000-8000-000000000003" in store100
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
)
# Database is detached, nothing to clean up
store200 = node1.exec_in_container(["ls", f"{path_to_data}/store/200"])
assert "20000000-1000-4000-8000-000000000001" in store200
assert "20000000-1000-4000-8000-000000000002" in store200
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
# Tables are detached, nothing to clean up
store300 = node1.exec_in_container(["ls", f"{path_to_data}/store/300"])
assert "30000000-1000-4000-8000-000000000001" in store300
assert "30000000-1000-4000-8000-000000000002" in store300
assert "30000000-1000-4000-8000-000000000003" in store300
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
# Manually created garbage
store456 = node1.exec_in_container(["ls", f"{path_to_data}/store/456"])
assert "30000000-1000-4000-8000-000000000003" in store456
assert "45600000-1000-4000-8000-000000000003" in store456
assert "45600000-1000-4000-8000-000000000004" in store456
assert "testgarbage" in store456
assert "----------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
)
node1.wait_for_log_line(
"Removing unused directory", timeout=90, look_behind_lines=1000000
)
node1.wait_for_log_line(
"directories from store", timeout=90, look_behind_lines=1000000
)
node1.wait_for_log_line(
"Nothing to clean up from store/", timeout=90, look_behind_lines=1000000
)
store = node1.exec_in_container(["ls", f"{path_to_data}/store"])
assert "100" in store
assert "200" in store
assert "300" in store
assert "456" in store
assert "kek" not in store # changed
assert "\n12\n" not in store # changed
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
) # changed
# Metadata is removed, so store/100 contains garbage
store100 = node1.exec_in_container(["ls", f"{path_to_data}/store/100"]) # changed
assert "10000000-1000-4000-8000-000000000001" not in store100 # changed
assert "10000000-1000-4000-8000-000000000002" not in store100 # changed
assert "10000000-1000-4000-8000-000000000003" not in store100 # changed
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
) # changed
# Database is detached, nothing to clean up
store200 = node1.exec_in_container(["ls", f"{path_to_data}/store/200"])
assert "20000000-1000-4000-8000-000000000001" in store200
assert "20000000-1000-4000-8000-000000000002" in store200
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
# Tables are detached, nothing to clean up
store300 = node1.exec_in_container(["ls", f"{path_to_data}/store/300"])
assert "30000000-1000-4000-8000-000000000001" in store300
assert "30000000-1000-4000-8000-000000000002" in store300
assert "30000000-1000-4000-8000-000000000003" in store300
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
# Manually created garbage
store456 = node1.exec_in_container(["ls", f"{path_to_data}/store/456"])
assert "30000000-1000-4000-8000-000000000003" not in store456 # changed
assert "45600000-1000-4000-8000-000000000003" not in store456 # changed
assert "45600000-1000-4000-8000-000000000004" not in store456 # changed
assert "testgarbage" not in store456 # changed
assert "---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
) # changed
node1.query("ATTACH TABLE db3.log2")
node1.query("ATTACH DATABASE db2")
node1.query("ATTACH TABLE db3.log")
assert "1\n" == node1.query("SELECT * FROM db3.log")
assert "1\n" == node1.query("SELECT * FROM db3.log2")
assert "1\n" == node1.query("SELECT * FROM db2.log")