Add more retryable errors and fix check data part

(cherry picked from commit 6d98507feb0bfcc7761583c3a9a3575108207cee)
This commit is contained in:
alesapin 2024-09-30 21:36:57 +02:00
parent d73de82218
commit 48be3f42f1
2 changed files with 66 additions and 19 deletions

View File

@ -131,7 +131,12 @@ bool KeyMetadata::createBaseDirectory(bool throw_if_failed)
{ {
created_base_directory = false; created_base_directory = false;
if (!throw_if_failed && e.code() == std::errc::no_space_on_device) if (!throw_if_failed &&
(e.code() == std::errc::no_space_on_device
|| e.code() == std::errc::read_only_file_system
|| e.code() == std::errc::permission_denied
|| e.code() == std::errc::too_many_files_open
|| e.code() == std::errc::operation_not_permitted))
{ {
LOG_TRACE(cache_metadata->log, "Failed to create base directory for key {}, " LOG_TRACE(cache_metadata->log, "Failed to create base directory for key {}, "
"because no space left on device", key); "because no space left on device", key);

View File

@ -91,6 +91,22 @@ bool isRetryableException(std::exception_ptr exception_ptr)
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK || e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|| e.code() == ErrorCodes::ABORTED; || e.code() == ErrorCodes::ABORTED;
} }
catch (const std::filesystem::filesystem_error & e)
{
return e.code() == std::errc::no_space_on_device ||
e.code() == std::errc::read_only_file_system ||
e.code() == std::errc::too_many_files_open_in_system ||
e.code() == std::errc::operation_not_permitted ||
e.code() == std::errc::device_or_resource_busy ||
e.code() == std::errc::permission_denied ||
e.code() == std::errc::too_many_files_open ||
e.code() == std::errc::text_file_busy ||
e.code() == std::errc::timed_out ||
e.code() == std::errc::not_enough_memory ||
e.code() == std::errc::not_supported ||
e.code() == std::errc::too_many_links ||
e.code() == std::errc::too_many_symbolic_link_levels;
}
catch (const Poco::Net::NetException &) catch (const Poco::Net::NetException &)
{ {
return true; return true;
@ -114,7 +130,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
const NamesAndTypesList & columns_list, const NamesAndTypesList & columns_list,
const MergeTreeDataPartType & part_type, const MergeTreeDataPartType & part_type,
const NameSet & files_without_checksums, const NameSet & files_without_checksums,
const ReadSettings & read_settings, ReadSettings read_settings,
bool require_checksums, bool require_checksums,
std::function<bool()> is_cancelled, std::function<bool()> is_cancelled,
bool & is_broken_projection, bool & is_broken_projection,
@ -171,13 +187,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
SerializationInfo::Settings settings{ratio_of_defaults, false}; SerializationInfo::Settings settings{ratio_of_defaults, false};
serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file); serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file);
} }
catch (const Poco::Exception & ex)
{
throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, ex.message());
}
catch (...) catch (...)
{ {
throw; throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load file {} of data part {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, data_part->name, getCurrentExceptionMessage(true));
} }
} }
@ -398,8 +410,18 @@ IMergeTreeDataPart::Checksums checkDataPart(
} }
ReadSettings read_settings; ReadSettings read_settings;
read_settings.read_through_distributed_cache = false;
read_settings.enable_filesystem_cache = false; read_settings.enable_filesystem_cache = false;
read_settings.enable_filesystem_cache_log = false;
read_settings.enable_filesystem_read_prefetches_log = false;
read_settings.page_cache = nullptr;
read_settings.load_marks_asynchronously = false;
read_settings.remote_fs_prefetch = false;
read_settings.page_cache_inject_eviction = false;
read_settings.use_page_cache_for_disks_without_file_cache = false;
try
{
return checkDataPart( return checkDataPart(
data_part, data_part,
data_part_storage, data_part_storage,
@ -411,6 +433,20 @@ IMergeTreeDataPart::Checksums checkDataPart(
is_cancelled, is_cancelled,
is_broken_projection, is_broken_projection,
throw_on_broken_projection); throw_on_broken_projection);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
{
LOG_DEBUG(
getLogger("checkDataPart"),
"Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false));
return IMergeTreeDataPart::Checksums{};
}
throw;
}
}; };
try try
@ -431,7 +467,13 @@ IMergeTreeDataPart::Checksums checkDataPart(
catch (...) catch (...)
{ {
if (isRetryableException(std::current_exception())) if (isRetryableException(std::current_exception()))
throw; {
LOG_DEBUG(
getLogger("checkDataPart"),
"Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false));
return {};
}
return drop_cache_and_check(); return drop_cache_and_check();
} }
} }