ClickHouse/tests/integration/test_store_cleanup/test.py
2023-10-13 16:35:42 +02:00

213 lines
8.0 KiB
Python

import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", stay_alive=True, main_configs=["configs/store_cleanup.xml"]
)
path_to_data = "/var/lib/clickhouse/"
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_store_cleanup(started_cluster):
node1.query("CREATE DATABASE db UUID '10000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db.log UUID '10000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query(
"CREATE TABLE db.mt UUID '10000000-1000-4000-8000-000000000003' ENGINE=MergeTree ORDER BY tuple() AS SELECT 1"
)
node1.query(
"CREATE TABLE db.mem UUID '10000000-1000-4000-8000-000000000004' ENGINE=Memory AS SELECT 1"
)
node1.query("CREATE DATABASE db2 UUID '20000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db2.log UUID '20000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query("DETACH DATABASE db2")
node1.query("CREATE DATABASE db3 UUID '30000000-1000-4000-8000-000000000001'")
node1.query(
"CREATE TABLE db3.log UUID '30000000-1000-4000-8000-000000000002' ENGINE=Log AS SELECT 1"
)
node1.query(
"CREATE TABLE db3.log2 UUID '30000000-1000-4000-8000-000000000003' ENGINE=Log AS SELECT 1"
)
node1.query("DETACH TABLE db3.log")
node1.query("DETACH TABLE db3.log2 PERMANENTLY")
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
node1.stop_clickhouse(kill=True)
# All dirs related to `db` will be removed
node1.exec_in_container(["rm", f"{path_to_data}/metadata/db.sql"])
node1.exec_in_container(["mkdir", f"{path_to_data}/store/kek"])
node1.exec_in_container(["touch", f"{path_to_data}/store/12"])
try:
node1.exec_in_container(["mkdir", f"{path_to_data}/store/456"])
except Exception as e:
print("Failed to create 456/:", str(e))
node1.exec_in_container(["mkdir", f"{path_to_data}/store/456/testgarbage"])
node1.exec_in_container(
["mkdir", f"{path_to_data}/store/456/30000000-1000-4000-8000-000000000003"]
)
node1.exec_in_container(
["touch", f"{path_to_data}/store/456/45600000-1000-4000-8000-000000000003"]
)
node1.exec_in_container(
["mkdir", f"{path_to_data}/store/456/45600000-1000-4000-8000-000000000004"]
)
node1.start_clickhouse()
node1.query("DETACH DATABASE db2")
node1.query("DETACH TABLE db3.log")
node1.wait_for_log_line(
"Removing access rights for unused directory",
timeout=60,
look_behind_lines=1000000,
)
node1.wait_for_log_line(
"directories from store", timeout=60, look_behind_lines=1000000
)
store = node1.exec_in_container(["ls", f"{path_to_data}/store"])
assert "100" in store
assert "200" in store
assert "300" in store
assert "456" in store
assert "kek" in store
assert "12" in store
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
)
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
)
# Metadata is removed, so store/100 contains garbage
store100 = node1.exec_in_container(["ls", f"{path_to_data}/store/100"])
assert "10000000-1000-4000-8000-000000000001" in store100
assert "10000000-1000-4000-8000-000000000002" in store100
assert "10000000-1000-4000-8000-000000000003" in store100
assert "d---------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
)
# Database is detached, nothing to clean up
store200 = node1.exec_in_container(["ls", f"{path_to_data}/store/200"])
assert "20000000-1000-4000-8000-000000000001" in store200
assert "20000000-1000-4000-8000-000000000002" in store200
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
# Tables are detached, nothing to clean up
store300 = node1.exec_in_container(["ls", f"{path_to_data}/store/300"])
assert "30000000-1000-4000-8000-000000000001" in store300
assert "30000000-1000-4000-8000-000000000002" in store300
assert "30000000-1000-4000-8000-000000000003" in store300
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
# Manually created garbage
store456 = node1.exec_in_container(["ls", f"{path_to_data}/store/456"])
assert "30000000-1000-4000-8000-000000000003" in store456
assert "45600000-1000-4000-8000-000000000003" in store456
assert "45600000-1000-4000-8000-000000000004" in store456
assert "testgarbage" in store456
assert "----------" in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
)
node1.wait_for_log_line(
"Removing unused directory", timeout=90, look_behind_lines=1000000
)
node1.wait_for_log_line(
"directories from store", timeout=90, look_behind_lines=1000000
)
node1.wait_for_log_line(
"Nothing to clean up from store/", timeout=90, look_behind_lines=1000000
)
store = node1.exec_in_container(["ls", f"{path_to_data}/store"])
assert "100" in store
assert "200" in store
assert "300" in store
assert "456" in store
assert "kek" not in store # changed
assert "\n12\n" not in store # changed
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store"]
) # changed
# Metadata is removed, so store/100 contains garbage
store100 = node1.exec_in_container(["ls", f"{path_to_data}/store/100"]) # changed
assert "10000000-1000-4000-8000-000000000001" not in store100 # changed
assert "10000000-1000-4000-8000-000000000002" not in store100 # changed
assert "10000000-1000-4000-8000-000000000003" not in store100 # changed
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/100"]
) # changed
# Database is detached, nothing to clean up
store200 = node1.exec_in_container(["ls", f"{path_to_data}/store/200"])
assert "20000000-1000-4000-8000-000000000001" in store200
assert "20000000-1000-4000-8000-000000000002" in store200
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/200"]
)
# Tables are detached, nothing to clean up
store300 = node1.exec_in_container(["ls", f"{path_to_data}/store/300"])
assert "30000000-1000-4000-8000-000000000001" in store300
assert "30000000-1000-4000-8000-000000000002" in store300
assert "30000000-1000-4000-8000-000000000003" in store300
assert "d---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/300"]
)
# Manually created garbage
store456 = node1.exec_in_container(["ls", f"{path_to_data}/store/456"])
assert "30000000-1000-4000-8000-000000000003" not in store456 # changed
assert "45600000-1000-4000-8000-000000000003" not in store456 # changed
assert "45600000-1000-4000-8000-000000000004" not in store456 # changed
assert "testgarbage" not in store456 # changed
assert "---------" not in node1.exec_in_container(
["ls", "-l", f"{path_to_data}/store/456"]
) # changed
node1.query("ATTACH TABLE db3.log2")
node1.query("ATTACH DATABASE db2")
node1.query("ATTACH TABLE db3.log")
assert "1\n" == node1.query("SELECT * FROM db3.log")
assert "1\n" == node1.query("SELECT * FROM db3.log2")
assert "1\n" == node1.query("SELECT * FROM db2.log")