mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Don't leave trash part nodes in ZK in case of deleting non-existing duplicated parts. [#CLICKHOUSE-3252]
This commit is contained in:
parent
221110454e
commit
84660029ec
@ -1304,7 +1304,7 @@ void MergeTreeData::replaceParts(const DataPartsVector & remove, const DataParts
|
||||
|
||||
for (const DataPartPtr & part : remove)
|
||||
{
|
||||
part->remove_time = clear_without_timeout ? 0 : time(0);
|
||||
part->remove_time = clear_without_timeout ? 0 : time(nullptr);
|
||||
|
||||
if (data_parts.erase(part))
|
||||
removePartContributionToColumnSizes(part);
|
||||
|
@ -3790,11 +3790,26 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
/// Part names that were successfully deleted from filesystem and should be deleted from ZooKeeper
|
||||
Strings part_names;
|
||||
auto remove_from_zookeeper = [&] ()
|
||||
{
|
||||
LOG_DEBUG(log, "Removed " << part_names.size() << " old parts from filesystem. Removing them from ZooKeeper.");
|
||||
|
||||
try
|
||||
{
|
||||
removePartsFromZooKeeper(zookeeper, part_names);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false));
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
LOG_DEBUG(log, "Removing " << parts.size() << " old parts from filesystem");
|
||||
|
||||
Strings part_names;
|
||||
while (!parts.empty())
|
||||
{
|
||||
MergeTreeData::DataPartPtr & part = parts.back();
|
||||
@ -3802,42 +3817,81 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
|
||||
part_names.emplace_back(part->name);
|
||||
parts.pop_back();
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Removed " << part_names.size() << " old parts from filesystem. Removing them from ZooKeeper.");
|
||||
|
||||
try
|
||||
{
|
||||
removePartsFromZooKeeper(zookeeper, part_names);
|
||||
}
|
||||
catch (const zkutil::KeeperException & e)
|
||||
{
|
||||
LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false));
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
/// Finalize deletion of parts already deleted from filesystem, rollback remaining parts
|
||||
data.addOldParts(parts);
|
||||
remove_from_zookeeper();
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
/// Finalize deletion
|
||||
remove_from_zookeeper();
|
||||
|
||||
LOG_DEBUG(log, "Removed " << count << " old parts");
|
||||
}
|
||||
|
||||
|
||||
static int32_t tryMultiWithRetries(zkutil::ZooKeeperPtr & zookeeper, zkutil::Ops & ops) noexcept
|
||||
{
|
||||
int32_t code;
|
||||
try
|
||||
{
|
||||
code = zookeeper->tryMultiWithRetries(ops);
|
||||
}
|
||||
catch (const zkutil::KeeperException & e)
|
||||
{
|
||||
code = e.code;
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
|
||||
void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names)
|
||||
{
|
||||
zkutil::Ops ops;
|
||||
auto it_first_node_in_batch = part_names.cbegin();
|
||||
|
||||
for (auto it = part_names.cbegin(); it != part_names.cend(); ++it)
|
||||
{
|
||||
removePartFromZooKeeper(*it, ops);
|
||||
|
||||
if (ops.size() >= zkutil::MULTI_BATCH_SIZE || next(it) == part_names.cend())
|
||||
auto it_next = std::next(it);
|
||||
if (ops.size() >= zkutil::MULTI_BATCH_SIZE || it_next == part_names.cend())
|
||||
{
|
||||
/// It is Ok to use multi with retries to delete nodes, because new nodes with the same names cannot appear here
|
||||
zookeeper->tryMultiWithRetries(ops);
|
||||
auto code = tryMultiWithRetries(zookeeper, ops);
|
||||
ops.clear();
|
||||
|
||||
if (code == ZNONODE)
|
||||
{
|
||||
/// Fallback
|
||||
LOG_DEBUG(log, "There are no some part nodes in ZooKeeper, will remove part nodes sequentially");
|
||||
|
||||
for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch)
|
||||
{
|
||||
zkutil::Ops cur_ops;
|
||||
removePartFromZooKeeper(*it_in_batch, cur_ops);
|
||||
auto cur_code = tryMultiWithRetries(zookeeper, cur_ops);
|
||||
|
||||
if (cur_code == ZNONODE)
|
||||
LOG_DEBUG(log, "There is no part " << *it_in_batch << " in ZooKeeper, it was only in filesystem");
|
||||
else if (cur_code != ZOK)
|
||||
LOG_WARNING(log, "Cannot remove part " << *it_in_batch << " from ZooKeeper: " << ::zerror(cur_code));
|
||||
}
|
||||
}
|
||||
else if (code != ZOK)
|
||||
{
|
||||
LOG_WARNING(log, "There was a problem with deleting " << (it_next - it_first_node_in_batch)
|
||||
<< " nodes from ZooKeeper: " << ::zerror(code));
|
||||
}
|
||||
|
||||
it_first_node_in_batch = it_next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user