Implement max_suspicious_broken_parts_bytes

Usually broken parts are found from just created parts, which are
usually small enough, so it make sense to have a limit in bytes for
broken parts.

Default value was set to 1G.
This commit is contained in:
Azat Khuzhin 2021-09-07 22:32:48 +03:00
parent e3ee039c48
commit b108c5e1b0
2 changed files with 24 additions and 11 deletions

View File

@ -978,6 +978,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
DataPartsVector broken_parts_to_detach;
size_t suspicious_broken_parts = 0;
size_t suspicious_broken_parts_bytes = 0;
std::atomic<bool> has_adaptive_parts = false;
std::atomic<bool> has_non_adaptive_parts = false;
@ -1004,17 +1005,18 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if (part_disk_ptr->exists(marker_path))
{
/// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist
size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath());
LOG_WARNING(log,
"Detaching stale part {}{}, which should have been deleted after a move. That can only happen "
"after unclean restart of ClickHouse after move of a part having an operation blocking that "
"stale copy of part.",
getFullPathOnDisk(part_disk_ptr), part_name);
"Detaching stale part {}{} (size: {}), which should have been deleted after a move. "
"That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.",
getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part));
std::lock_guard loading_lock(mutex);
broken_parts_to_detach.push_back(part);
++suspicious_broken_parts;
suspicious_broken_parts_bytes += size_of_part;
return;
}
@ -1043,16 +1045,20 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
/// Ignore broken parts that can appear as a result of hard server restart.
if (broken)
{
LOG_ERROR(log,
"Detaching broken part {}{}. If it happened after update, it is likely because of backward "
"incompatibility. You need to resolve this manually",
getFullPathOnDisk(part_disk_ptr), part_name);
/// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist
size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath());
LOG_ERROR(log,
"Detaching broken part {}{} (size: {}). "
"If it happened after update, it is likely because of backward incompability. "
"You need to resolve this manually",
getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part));
std::lock_guard loading_lock(mutex);
broken_parts_to_detach.push_back(part);
++suspicious_broken_parts;
suspicious_broken_parts_bytes += size_of_part;
return;
}
@ -1099,8 +1105,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
has_non_adaptive_index_granularity_parts = has_non_adaptive_parts;
if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks)
throw Exception("Suspiciously many (" + toString(suspicious_broken_parts) + ") broken parts to remove.",
ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS);
throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
"Suspiciously many ({}) broken parts to remove.",
suspicious_broken_parts);
if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks)
throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
"Suspiciously big size ({}) of all broken parts to remove.",
formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes));
for (auto & part : broken_parts_to_detach)
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes

View File

@ -79,6 +79,7 @@ struct Settings;
M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \
M(Bool, always_fetch_merged_part, false, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \
M(UInt64, max_suspicious_broken_parts, 10, "Max broken parts, if more - deny automatic deletion.", 0) \
M(UInt64, max_suspicious_broken_parts_bytes, 1ULL * 1024 * 1024 * 1024, "Max size of all broken parts, if more - deny automatic deletion.", 0) \
M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \
M(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \
M(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \