From 48be3f42f1faf27c9f167ba75e1513976928610f Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 30 Sep 2024 21:36:57 +0200 Subject: [PATCH] Add more retryable errors and fix check data part (cherry picked from commit 6d98507feb0bfcc7761583c3a9a3575108207cee) --- src/Interpreters/Cache/Metadata.cpp | 7 ++- src/Storages/MergeTree/checkDataPart.cpp | 78 ++++++++++++++++++------ 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 398a48c790b..981c1052d01 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -131,7 +131,12 @@ bool KeyMetadata::createBaseDirectory(bool throw_if_failed) { created_base_directory = false; - if (!throw_if_failed && e.code() == std::errc::no_space_on_device) + if (!throw_if_failed && + (e.code() == std::errc::no_space_on_device + || e.code() == std::errc::read_only_file_system + || e.code() == std::errc::permission_denied + || e.code() == std::errc::too_many_files_open + || e.code() == std::errc::operation_not_permitted)) { LOG_TRACE(cache_metadata->log, "Failed to create base directory for key {}, " "because no space left on device", key); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 975097b5fda..01c57384734 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -91,6 +91,22 @@ bool isRetryableException(std::exception_ptr exception_ptr) || e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK || e.code() == ErrorCodes::ABORTED; } + catch (const std::filesystem::filesystem_error & e) + { + return e.code() == std::errc::no_space_on_device || + e.code() == std::errc::read_only_file_system || + e.code() == std::errc::too_many_files_open_in_system || + e.code() == std::errc::operation_not_permitted || + e.code() == std::errc::device_or_resource_busy || + e.code() == std::errc::permission_denied || + e.code() == std::errc::too_many_files_open || + e.code() == std::errc::text_file_busy || + e.code() == std::errc::timed_out || + e.code() == std::errc::not_enough_memory || + e.code() == std::errc::not_supported || + e.code() == std::errc::too_many_links || + e.code() == std::errc::too_many_symbolic_link_levels; + } catch (const Poco::Net::NetException &) { return true; @@ -114,7 +130,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( const NamesAndTypesList & columns_list, const MergeTreeDataPartType & part_type, const NameSet & files_without_checksums, - const ReadSettings & read_settings, + ReadSettings read_settings, bool require_checksums, std::function is_cancelled, bool & is_broken_projection, @@ -171,13 +187,9 @@ static IMergeTreeDataPart::Checksums checkDataPart( SerializationInfo::Settings settings{ratio_of_defaults, false}; serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file); } - catch (const Poco::Exception & ex) - { - throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, ex.message()); - } catch (...) { - throw; + throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load file {} of data part {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, data_part->name, getCurrentExceptionMessage(true)); } } @@ -398,19 +410,43 @@ IMergeTreeDataPart::Checksums checkDataPart( } ReadSettings read_settings; + read_settings.read_through_distributed_cache = false; read_settings.enable_filesystem_cache = false; + read_settings.enable_filesystem_cache_log = false; + read_settings.enable_filesystem_read_prefetches_log = false; + read_settings.page_cache = nullptr; + read_settings.load_marks_asynchronously = false; + read_settings.remote_fs_prefetch = false; + read_settings.page_cache_inject_eviction = false; + read_settings.use_page_cache_for_disks_without_file_cache = false; + + try + { + return checkDataPart( + data_part, + data_part_storage, + data_part->getColumns(), + data_part->getType(), + data_part->getFileNamesWithoutChecksums(), + read_settings, + require_checksums, + is_cancelled, + is_broken_projection, + throw_on_broken_projection); + } + catch (...) + { + if (isRetryableException(std::current_exception())) + { + LOG_DEBUG( + getLogger("checkDataPart"), + "Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false)); + + return IMergeTreeDataPart::Checksums{}; + } + throw; + } - return checkDataPart( - data_part, - data_part_storage, - data_part->getColumns(), - data_part->getType(), - data_part->getFileNamesWithoutChecksums(), - read_settings, - require_checksums, - is_cancelled, - is_broken_projection, - throw_on_broken_projection); }; try @@ -431,7 +467,13 @@ IMergeTreeDataPart::Checksums checkDataPart( catch (...) { if (isRetryableException(std::current_exception())) - throw; + { + LOG_DEBUG( + getLogger("checkDataPart"), + "Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false)); + + return {}; + } return drop_cache_and_check(); } }