Add more retryable errors and fix check data part

(cherry picked from commit 6d98507feb0bfcc7761583c3a9a3575108207cee)
This commit is contained in:
alesapin 2024-09-30 21:36:57 +02:00
parent d73de82218
commit 48be3f42f1
2 changed files with 66 additions and 19 deletions

View File

@ -131,7 +131,12 @@ bool KeyMetadata::createBaseDirectory(bool throw_if_failed)
{
created_base_directory = false;
if (!throw_if_failed && e.code() == std::errc::no_space_on_device)
if (!throw_if_failed &&
(e.code() == std::errc::no_space_on_device
|| e.code() == std::errc::read_only_file_system
|| e.code() == std::errc::permission_denied
|| e.code() == std::errc::too_many_files_open
|| e.code() == std::errc::operation_not_permitted))
{
LOG_TRACE(cache_metadata->log, "Failed to create base directory for key {}, "
"because no space left on device", key);

View File

@ -91,6 +91,22 @@ bool isRetryableException(std::exception_ptr exception_ptr)
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|| e.code() == ErrorCodes::ABORTED;
}
catch (const std::filesystem::filesystem_error & e)
{
return e.code() == std::errc::no_space_on_device ||
e.code() == std::errc::read_only_file_system ||
e.code() == std::errc::too_many_files_open_in_system ||
e.code() == std::errc::operation_not_permitted ||
e.code() == std::errc::device_or_resource_busy ||
e.code() == std::errc::permission_denied ||
e.code() == std::errc::too_many_files_open ||
e.code() == std::errc::text_file_busy ||
e.code() == std::errc::timed_out ||
e.code() == std::errc::not_enough_memory ||
e.code() == std::errc::not_supported ||
e.code() == std::errc::too_many_links ||
e.code() == std::errc::too_many_symbolic_link_levels;
}
catch (const Poco::Net::NetException &)
{
return true;
@ -114,7 +130,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
const NamesAndTypesList & columns_list,
const MergeTreeDataPartType & part_type,
const NameSet & files_without_checksums,
const ReadSettings & read_settings,
ReadSettings read_settings,
bool require_checksums,
std::function<bool()> is_cancelled,
bool & is_broken_projection,
@ -171,13 +187,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
SerializationInfo::Settings settings{ratio_of_defaults, false};
serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file);
}
catch (const Poco::Exception & ex)
{
throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, ex.message());
}
catch (...)
{
throw;
throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load file {} of data part {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, data_part->name, getCurrentExceptionMessage(true));
}
}
@ -398,19 +410,43 @@ IMergeTreeDataPart::Checksums checkDataPart(
}
ReadSettings read_settings;
read_settings.read_through_distributed_cache = false;
read_settings.enable_filesystem_cache = false;
read_settings.enable_filesystem_cache_log = false;
read_settings.enable_filesystem_read_prefetches_log = false;
read_settings.page_cache = nullptr;
read_settings.load_marks_asynchronously = false;
read_settings.remote_fs_prefetch = false;
read_settings.page_cache_inject_eviction = false;
read_settings.use_page_cache_for_disks_without_file_cache = false;
try
{
return checkDataPart(
data_part,
data_part_storage,
data_part->getColumns(),
data_part->getType(),
data_part->getFileNamesWithoutChecksums(),
read_settings,
require_checksums,
is_cancelled,
is_broken_projection,
throw_on_broken_projection);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
{
LOG_DEBUG(
getLogger("checkDataPart"),
"Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false));
return IMergeTreeDataPart::Checksums{};
}
throw;
}
return checkDataPart(
data_part,
data_part_storage,
data_part->getColumns(),
data_part->getType(),
data_part->getFileNamesWithoutChecksums(),
read_settings,
require_checksums,
is_cancelled,
is_broken_projection,
throw_on_broken_projection);
};
try
@ -431,7 +467,13 @@ IMergeTreeDataPart::Checksums checkDataPart(
catch (...)
{
if (isRetryableException(std::current_exception()))
throw;
{
LOG_DEBUG(
getLogger("checkDataPart"),
"Got reriable error {} checking data part {}, will return empty", data_part->name, getCurrentExceptionMessage(false));
return {};
}
return drop_cache_and_check();
}
}