mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 12:22:12 +00:00
Do not ignore error during moving parts to detached for MergeTree
MergeTree is fragile in this case, since the source of truth for it is the filesystem and it will be left in an inconsistent state. Image the following: - during startup you got some broken part, that should be moved to detached - this rename throws exception (i.e. permission error) - but this error is ignored, server continues, and tries to merge something that produces intersecting parts for that broken part - later on restart you will got intersecting parts (since at the time of scanning for intersecting parts, the part is not checked) Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
fd9c4d53cc
commit
4f6c0c0b54
@ -1894,7 +1894,6 @@ bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & stora
|
|||||||
}
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists)
|
void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists)
|
||||||
try
|
|
||||||
{
|
{
|
||||||
assertOnDisk();
|
assertOnDisk();
|
||||||
|
|
||||||
@ -1920,18 +1919,6 @@ try
|
|||||||
for (const auto & [_, part] : projection_parts)
|
for (const auto & [_, part] : projection_parts)
|
||||||
part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path);
|
part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path);
|
||||||
}
|
}
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
if (startsWith(new_relative_path, fs::path(MergeTreeData::DETACHED_DIR_NAME) / ""))
|
|
||||||
{
|
|
||||||
// Don't throw when the destination is to the detached folder. It might be able to
|
|
||||||
// recover in some cases, such as fetching parts into multi-disks while some of the
|
|
||||||
// disks are broken.
|
|
||||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<bool, NameSet> IMergeTreeDataPart::canRemovePart() const
|
std::pair<bool, NameSet> IMergeTreeDataPart::canRemovePart() const
|
||||||
{
|
{
|
||||||
@ -2038,11 +2025,26 @@ std::optional<String> IMergeTreeDataPart::getRelativePathForDetachedPart(const S
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::renameToDetached(const String & prefix)
|
void IMergeTreeDataPart::renameToDetached(const String & prefix, bool ignore_error)
|
||||||
{
|
{
|
||||||
auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false);
|
auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false);
|
||||||
assert(path_to_detach);
|
assert(path_to_detach);
|
||||||
|
try
|
||||||
|
{
|
||||||
renameTo(path_to_detach.value(), true);
|
renameTo(path_to_detach.value(), true);
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
if (ignore_error)
|
||||||
|
{
|
||||||
|
// Don't throw when the destination is to the detached folder. It might be able to
|
||||||
|
// recover in some cases, such as fetching parts into multi-disks while some of the
|
||||||
|
// disks are broken.
|
||||||
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw;
|
||||||
|
}
|
||||||
part_is_probably_removed_from_disk = true;
|
part_is_probably_removed_from_disk = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -396,7 +396,7 @@ public:
|
|||||||
auto getFilesChecksums() const { return checksums.files; }
|
auto getFilesChecksums() const { return checksums.files; }
|
||||||
|
|
||||||
/// Moves a part to detached/ directory and adds prefix to its name
|
/// Moves a part to detached/ directory and adds prefix to its name
|
||||||
void renameToDetached(const String & prefix);
|
void renameToDetached(const String & prefix, bool ignore_error = false);
|
||||||
|
|
||||||
/// Makes checks and move part to new directory
|
/// Makes checks and move part to new directory
|
||||||
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
|
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
|
||||||
|
@ -2009,9 +2009,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
|||||||
if (suspicious_broken_unexpected_parts != 0)
|
if (suspicious_broken_unexpected_parts != 0)
|
||||||
LOG_WARNING(log, "Found suspicious broken unexpected parts {} with total rows count {}", suspicious_broken_unexpected_parts, suspicious_broken_unexpected_parts_bytes);
|
LOG_WARNING(log, "Found suspicious broken unexpected parts {} with total rows count {}", suspicious_broken_unexpected_parts, suspicious_broken_unexpected_parts_bytes);
|
||||||
|
|
||||||
|
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
|
||||||
if (!is_static_storage)
|
if (!is_static_storage)
|
||||||
for (auto & part : broken_parts_to_detach)
|
for (auto & part : broken_parts_to_detach)
|
||||||
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
|
part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
|
||||||
|
|
||||||
resetObjectColumnsFromActiveParts(part_lock);
|
resetObjectColumnsFromActiveParts(part_lock);
|
||||||
resetSerializationHints(part_lock);
|
resetSerializationHints(part_lock);
|
||||||
@ -2099,6 +2100,7 @@ try
|
|||||||
|
|
||||||
ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts");
|
ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts");
|
||||||
|
|
||||||
|
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
|
||||||
for (auto & load_state : unexpected_data_parts)
|
for (auto & load_state : unexpected_data_parts)
|
||||||
{
|
{
|
||||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||||
@ -2114,9 +2116,7 @@ try
|
|||||||
|
|
||||||
chassert(load_state.part);
|
chassert(load_state.part);
|
||||||
if (load_state.is_broken)
|
if (load_state.is_broken)
|
||||||
{
|
load_state.part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
|
||||||
load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
|
|
||||||
}
|
|
||||||
}, Priority{});
|
}, Priority{});
|
||||||
}
|
}
|
||||||
runner.waitForAllToFinishAndRethrowFirstError();
|
runner.waitForAllToFinishAndRethrowFirstError();
|
||||||
@ -2167,6 +2167,7 @@ try
|
|||||||
|
|
||||||
ThreadPoolCallbackRunnerLocal<void> runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
|
ThreadPoolCallbackRunnerLocal<void> runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
|
||||||
|
|
||||||
|
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
ThreadFuzzer::maybeInjectSleep();
|
ThreadFuzzer::maybeInjectSleep();
|
||||||
@ -2207,7 +2208,7 @@ try
|
|||||||
if (res.is_broken)
|
if (res.is_broken)
|
||||||
{
|
{
|
||||||
forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(res.part->name);
|
forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(res.part->name);
|
||||||
res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
|
res.part->renameToDetached("broken-on-start", /*ignore_error=*/ replicated); /// detached parts must not have '_' in prefixes
|
||||||
}
|
}
|
||||||
else if (res.part->is_duplicate)
|
else if (res.part->is_duplicate)
|
||||||
res.part->remove();
|
res.part->remove();
|
||||||
@ -4530,6 +4531,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
|
|||||||
auto lock = lockParts();
|
auto lock = lockParts();
|
||||||
bool removed_active_part = false;
|
bool removed_active_part = false;
|
||||||
bool restored_active_part = false;
|
bool restored_active_part = false;
|
||||||
|
bool replicated = dynamic_cast<StorageReplicatedMergeTree *>(this) != nullptr;
|
||||||
|
|
||||||
auto it_part = data_parts_by_info.find(part_to_detach->info);
|
auto it_part = data_parts_by_info.find(part_to_detach->info);
|
||||||
if (it_part == data_parts_by_info.end())
|
if (it_part == data_parts_by_info.end())
|
||||||
@ -4548,7 +4550,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
|
|||||||
}
|
}
|
||||||
|
|
||||||
modifyPartState(it_part, DataPartState::Deleting);
|
modifyPartState(it_part, DataPartState::Deleting);
|
||||||
asMutableDeletingPart(part)->renameToDetached(prefix);
|
asMutableDeletingPart(part)->renameToDetached(prefix, /*ignore_error=*/ replicated);
|
||||||
LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState());
|
LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState());
|
||||||
data_parts_indexes.erase(it_part);
|
data_parts_indexes.erase(it_part);
|
||||||
|
|
||||||
@ -5939,6 +5941,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(cons
|
|||||||
.withPartType(MergeTreeDataPartType::Wide)
|
.withPartType(MergeTreeDataPartType::Wide)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
/// Errors should not be ignored RESTORE, since you should not restore to broken disks.
|
||||||
part->renameToDetached("broken-from-backup");
|
part->renameToDetached("broken-from-backup");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1844,9 +1844,7 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
|||||||
|
|
||||||
/// detached all unexpected data parts after sanity check.
|
/// detached all unexpected data parts after sanity check.
|
||||||
for (auto & part_state : unexpected_data_parts)
|
for (auto & part_state : unexpected_data_parts)
|
||||||
{
|
part_state.part->renameToDetached("ignored", /* ignore_error= */ true);
|
||||||
part_state.part->renameToDetached("ignored");
|
|
||||||
}
|
|
||||||
unexpected_data_parts.clear();
|
unexpected_data_parts.clear();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
Reference in New Issue
Block a user