mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 11:32:03 +00:00
fix rare data loss
This commit is contained in:
parent
ec80068611
commit
f8062d77e1
@ -9159,7 +9159,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
|
||||
|
||||
files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end());
|
||||
|
||||
LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name);
|
||||
LOG_TRACE(logger, "Removing zookeeper lock {} for part {} (files to keep: [{}])", zookeeper_part_replica_node, part_name, fmt::join(files_not_to_remove, ", "));
|
||||
|
||||
if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK)
|
||||
{
|
||||
@ -9196,7 +9196,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(logger, "No more children left for for {}, will try to remove the whole node", zookeeper_part_uniq_node);
|
||||
LOG_TRACE(logger, "No more children left for {}, will try to remove the whole node", zookeeper_part_uniq_node);
|
||||
}
|
||||
|
||||
auto error_code = zookeeper_ptr->tryRemove(zookeeper_part_uniq_node);
|
||||
@ -9252,8 +9252,19 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists",
|
||||
zookeeper_part_node, part_name, children.size(), fmt::join(children, ", "));
|
||||
/// It's possible that we have two instances of the same part with different blob names of
|
||||
/// FILE_FOR_REFERENCES_CHECK aka checksums.txt aka part_unique_id,
|
||||
/// and other files in both parts are hardlinks (the same blobs are shared between part instances).
|
||||
/// It's possible after unsuccessful attempts to commit a mutated part to zk.
|
||||
/// It's not a problem if we have found the mutation parent (so we have files_not_to_remove).
|
||||
/// But in rare cases mutations parents could have been already removed (so we don't have the list of hardlinks).
|
||||
|
||||
/// I'm not 100% sure that parent_not_to_remove list cannot be incomplete (when it's not empty)
|
||||
if (part_info.mutation && parent_not_to_remove.empty())
|
||||
part_has_no_more_locks = false;
|
||||
|
||||
LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists (can remove blobs: {})",
|
||||
zookeeper_part_node, part_name, children.size(), fmt::join(children, ", "), part_has_no_more_locks);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user