Merge pull request #67026 from ClickHouse/backport/24.7/66898

Backport #66898 to 24.7: [CI Fest] Better processing of broken parts and their projections (fixes rare cases of lost forever)
This commit is contained in:
robot-ch-test-poll2 2024-07-24 16:48:32 +04:00 committed by GitHub
commit 7518a547ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 27 additions and 18 deletions

View File

@ -16,6 +16,7 @@
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Common/CurrentMetrics.h>
#include <Common/NetException.h>
#include <Common/randomDelay.h>
@ -224,14 +225,18 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM
&& !isRetryableException(std::current_exception()))
{
report_broken_part();
}
throw;
}
catch (...)
{
report_broken_part();
if (!isRetryableException(std::current_exception()))
report_broken_part();
throw;
}
}

View File

@ -15,16 +15,11 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Common/logger_useful.h>
#include <Processors/Merges/Algorithms/MergeTreePartLevelInfo.h>
#include <Storages/MergeTree/checkDataPart.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
}
/// Lightweight (in terms of logic) stream for reading single part from
/// MergeTree, used for merges and mutations.
///
@ -281,7 +276,7 @@ try
catch (...)
{
/// Suspicion of the broken part. A part is added to the queue for verification.
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
if (!isRetryableException(std::current_exception()))
storage.reportBrokenPart(data_part);
throw;
}

View File

@ -36,11 +36,13 @@ namespace ErrorCodes
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int CANNOT_SCHEDULE_TASK;
extern const int UNEXPECTED_FILE_IN_DATA_PART;
extern const int NO_FILE_IN_DATA_PART;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int BROKEN_PROJECTION;
extern const int ABORTED;
}
@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr)
{
return isNotEnoughMemoryErrorCode(e.code())
|| e.code() == ErrorCodes::NETWORK_ERROR
|| e.code() == ErrorCodes::SOCKET_TIMEOUT;
|| e.code() == ErrorCodes::SOCKET_TIMEOUT
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|| e.code() == ErrorCodes::ABORTED;
}
catch (const Poco::Net::NetException &)
{
@ -329,16 +333,21 @@ static IMergeTreeDataPart::Checksums checkDataPart(
projections_on_disk.erase(projection_file);
}
if (throw_on_broken_projection && !broken_projections_message.empty())
if (throw_on_broken_projection)
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (!broken_projections_message.empty())
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
"Found unexpected projection directories: {}",
fmt::join(projections_on_disk, ","));
/// This one is actually not broken, just redundant files on disk which
/// MergeTree will never use.
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
"Found unexpected projection directories: {}",
fmt::join(projections_on_disk, ","));
}
}
if (is_cancelled())