Do not ignore error during moving parts to detached for MergeTree

MergeTree is fragile in this case, since the source of truth for it is
the filesystem and it will be left in an inconsistent state.

Image the following:
- during startup you got some broken part, that should be moved to
  detached
- this rename throws exception (i.e. permission error)
- but this error is ignored, server continues, and tries to merge
  something that produces intersecting parts for that broken part
- later on restart you will got intersecting parts (since at the time of
  scanning for intersecting parts, the part is not checked)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2024-10-08 13:54:23 +02:00
parent fd9c4d53cc
commit 4f6c0c0b54
4 changed files with 28 additions and 25 deletions

View File

@ -1894,7 +1894,6 @@ bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & stora
} }
void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists)
try
{ {
assertOnDisk(); assertOnDisk();
@ -1920,18 +1919,6 @@ try
for (const auto & [_, part] : projection_parts) for (const auto & [_, part] : projection_parts)
part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path); part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path);
} }
catch (...)
{
if (startsWith(new_relative_path, fs::path(MergeTreeData::DETACHED_DIR_NAME) / ""))
{
// Don't throw when the destination is to the detached folder. It might be able to
// recover in some cases, such as fetching parts into multi-disks while some of the
// disks are broken.
tryLogCurrentException(__PRETTY_FUNCTION__);
}
else
throw;
}
std::pair<bool, NameSet> IMergeTreeDataPart::canRemovePart() const std::pair<bool, NameSet> IMergeTreeDataPart::canRemovePart() const
{ {
@ -2038,11 +2025,26 @@ std::optional<String> IMergeTreeDataPart::getRelativePathForDetachedPart(const S
return {}; return {};
} }
void IMergeTreeDataPart::renameToDetached(const String & prefix) void IMergeTreeDataPart::renameToDetached(const String & prefix, bool ignore_error)
{ {
auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false); auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false);
assert(path_to_detach); assert(path_to_detach);
renameTo(path_to_detach.value(), true); try
{
renameTo(path_to_detach.value(), true);
}
catch (...)
{
if (ignore_error)
{
// Don't throw when the destination is to the detached folder. It might be able to
// recover in some cases, such as fetching parts into multi-disks while some of the
// disks are broken.
tryLogCurrentException(__PRETTY_FUNCTION__);
}
else
throw;
}
part_is_probably_removed_from_disk = true; part_is_probably_removed_from_disk = true;
} }

View File

@ -396,7 +396,7 @@ public:
auto getFilesChecksums() const { return checksums.files; } auto getFilesChecksums() const { return checksums.files; }
/// Moves a part to detached/ directory and adds prefix to its name /// Moves a part to detached/ directory and adds prefix to its name
void renameToDetached(const String & prefix); void renameToDetached(const String & prefix, bool ignore_error = false);
/// Makes checks and move part to new directory /// Makes checks and move part to new directory
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly

View File

@ -2009,9 +2009,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
if (suspicious_broken_unexpected_parts != 0) if (suspicious_broken_unexpected_parts != 0)
LOG_WARNING(log, "Found suspicious broken unexpected parts {} with total rows count {}", suspicious_broken_unexpected_parts, suspicious_broken_unexpected_parts_bytes); LOG_WARNING(log, "Found suspicious broken unexpected parts {} with total rows count {}", suspicious_broken_unexpected_parts, suspicious_broken_unexpected_parts_bytes);
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
if (!is_static_storage) if (!is_static_storage)
for (auto & part : broken_parts_to_detach) for (auto & part : broken_parts_to_detach)
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
resetObjectColumnsFromActiveParts(part_lock); resetObjectColumnsFromActiveParts(part_lock);
resetSerializationHints(part_lock); resetSerializationHints(part_lock);
@ -2099,6 +2100,7 @@ try
ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts"); ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts");
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
for (auto & load_state : unexpected_data_parts) for (auto & load_state : unexpected_data_parts)
{ {
std::lock_guard lock(unexpected_data_parts_mutex); std::lock_guard lock(unexpected_data_parts_mutex);
@ -2114,9 +2116,7 @@ try
chassert(load_state.part); chassert(load_state.part);
if (load_state.is_broken) if (load_state.is_broken)
{ load_state.part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
}
}, Priority{}); }, Priority{});
} }
runner.waitForAllToFinishAndRethrowFirstError(); runner.waitForAllToFinishAndRethrowFirstError();
@ -2167,6 +2167,7 @@ try
ThreadPoolCallbackRunnerLocal<void> runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts"); ThreadPoolCallbackRunnerLocal<void> runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
while (true) while (true)
{ {
ThreadFuzzer::maybeInjectSleep(); ThreadFuzzer::maybeInjectSleep();
@ -2207,7 +2208,7 @@ try
if (res.is_broken) if (res.is_broken)
{ {
forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(res.part->name); forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(res.part->name);
res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes res.part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
} }
else if (res.part->is_duplicate) else if (res.part->is_duplicate)
res.part->remove(); res.part->remove();
@ -4530,6 +4531,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
auto lock = lockParts(); auto lock = lockParts();
bool removed_active_part = false; bool removed_active_part = false;
bool restored_active_part = false; bool restored_active_part = false;
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
auto it_part = data_parts_by_info.find(part_to_detach->info); auto it_part = data_parts_by_info.find(part_to_detach->info);
if (it_part == data_parts_by_info.end()) if (it_part == data_parts_by_info.end())
@ -4548,7 +4550,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
} }
modifyPartState(it_part, DataPartState::Deleting); modifyPartState(it_part, DataPartState::Deleting);
asMutableDeletingPart(part)->renameToDetached(prefix); asMutableDeletingPart(part)->renameToDetached(prefix, /*ignore_error=*/ replicated);
LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState()); LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState());
data_parts_indexes.erase(it_part); data_parts_indexes.erase(it_part);
@ -5939,6 +5941,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(cons
.withPartType(MergeTreeDataPartType::Wide) .withPartType(MergeTreeDataPartType::Wide)
.build(); .build();
} }
/// Errors should not be ignored RESTORE, since you should not restore to broken disks.
part->renameToDetached("broken-from-backup"); part->renameToDetached("broken-from-backup");
}; };

View File

@ -1844,9 +1844,7 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
/// detached all unexpected data parts after sanity check. /// detached all unexpected data parts after sanity check.
for (auto & part_state : unexpected_data_parts) for (auto & part_state : unexpected_data_parts)
{ part_state.part->renameToDetached("ignored", /* ignore_error= */ true);
part_state.part->renameToDetached("ignored");
}
unexpected_data_parts.clear(); unexpected_data_parts.clear();
return true; return true;