Fix parts removal after incorrect server shutdown

Before this patch, and after transaction support had been added #24258,
if the server had not removed old parts on shutdown, then they will left
forever.

Fixes: #24258 (cc @tavplubix)
v2: use Tx::PrehistoricTID over removePartsFromWorkingSet() from ctor
v3: apply black to new test
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2022-06-27 21:40:42 +03:00
parent f0271dc486
commit e70cff2cea
3 changed files with 80 additions and 1 deletions

View File

@ -1324,7 +1324,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it)
{
IMergeTreeDataPart & part = *it;
const DataPartPtr & part = *it;
part->remove_time.store(part->modification_time, std::memory_order_relaxed);
auto creation_csn = part->version.creation_csn.load(std::memory_order_relaxed);
@ -1339,6 +1339,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
}
modifyPartState(it, DataPartState::Outdated);
removePartContributionToDataVolume(part);
/// Explicitly set removal_tid_lock for parts w/o transaction (i.e. w/o txn_version.txt)
/// to avoid keeping part forever (see VersionMetadata::canBeRemoved())
if (!part->version.isRemovalTIDLocked())
{
TransactionInfoContext transaction_context{getStorageID(), part->name};
part->version.lockRemovalTID(Tx::PrehistoricTID, transaction_context);
}
};
/// All parts are in "Active" state after loading

View File

@ -0,0 +1,71 @@
# pylint: disable=unused-argument
# pylint: disable=redefined-outer-name
# pylint: disable=line-too-long
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", stay_alive=True)
def query_split(node, query):
return list(
map(lambda x: x.strip().split("\t"), node.query(query).strip().split("\n"))
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_parts_removal_on_abnormal_exit(start_cluster):
node.query(
"""
create table test_parts_removal (key Int) engine=MergeTree order by key;
insert into test_parts_removal values (1); -- all_1_1_0
insert into test_parts_removal values (2); -- all_1_2_0
optimize table test_parts_removal; -- all_2_2_0
"""
)
parts = query_split(
node, "select name, _state from system.parts where table = 'test_parts_removal'"
)
assert parts == [
["all_1_1_0", "Outdated"],
["all_1_2_1", "Active"],
["all_2_2_0", "Outdated"],
]
node.restart_clickhouse(kill=True)
parts = query_split(
node, "select name, _state from system.parts where table = 'test_parts_removal'"
)
assert parts == [
["all_1_1_0", "Outdated"],
["all_1_2_1", "Active"],
["all_2_2_0", "Outdated"],
]
node.query(
"""
detach table test_parts_removal;
attach table test_parts_removal;
"""
)
parts = query_split(
node, "select name, _state from system.parts where table = 'test_parts_removal'"
)
assert parts == [
["all_1_2_1", "Active"],
]