mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 10:52:30 +00:00
288 lines
8.7 KiB
C++
288 lines
8.7 KiB
C++
#pragma once
|
|
#include <Interpreters/ProcessList.h>
|
|
#include <base/sleep.h>
|
|
#include <Common/Exception.h>
|
|
#include <Common/ZooKeeper/KeeperException.h>
|
|
#include <Common/logger_useful.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int OK;
|
|
}
|
|
|
|
struct ZooKeeperRetriesInfo
|
|
{
|
|
ZooKeeperRetriesInfo() = default;
|
|
ZooKeeperRetriesInfo(std::string name_, Poco::Logger * logger_, UInt64 max_retries_, UInt64 initial_backoff_ms_, UInt64 max_backoff_ms_)
|
|
: name(std::move(name_))
|
|
, logger(logger_)
|
|
, max_retries(max_retries_)
|
|
, curr_backoff_ms(std::min(initial_backoff_ms_, max_backoff_ms_))
|
|
, max_backoff_ms(max_backoff_ms_)
|
|
{
|
|
}
|
|
|
|
std::string name;
|
|
Poco::Logger * logger = nullptr;
|
|
UInt64 max_retries = 0;
|
|
UInt64 curr_backoff_ms = 0;
|
|
UInt64 max_backoff_ms = 0;
|
|
UInt64 retry_count = 0;
|
|
};
|
|
|
|
class ZooKeeperRetriesControl
|
|
{
|
|
public:
|
|
ZooKeeperRetriesControl(std::string name_, ZooKeeperRetriesInfo & retries_info_, QueryStatusPtr elem)
|
|
: name(std::move(name_)), retries_info(retries_info_), process_list_element(elem)
|
|
{
|
|
}
|
|
|
|
void retryLoop(auto && f)
|
|
{
|
|
retryLoop(f, []() {});
|
|
}
|
|
|
|
/// retryLoop() executes f() until it succeeds/max_retries is reached/non-retrialable error is encountered
|
|
///
|
|
/// the callable f() can provide feedback in terms of errors in two ways:
|
|
/// 1. throw KeeperException exception:
|
|
/// in such case, retries are done only on hardware keeper errors
|
|
/// because non-hardware error codes are semantically not really errors, just a response
|
|
/// 2. set an error code in the ZooKeeperRetriesControl object (setUserError/setKeeperError)
|
|
/// The idea is that if the caller has some semantics on top of non-hardware keeper errors,
|
|
/// then it can provide feedback to retries controller via user errors
|
|
///
|
|
void retryLoop(auto && f, auto && iteration_cleanup)
|
|
{
|
|
while (canTry())
|
|
{
|
|
try
|
|
{
|
|
f();
|
|
iteration_cleanup();
|
|
}
|
|
catch (const zkutil::KeeperException & e)
|
|
{
|
|
iteration_cleanup();
|
|
|
|
if (!Coordination::isHardwareError(e.code))
|
|
throw;
|
|
|
|
setKeeperError(e.code, e.message());
|
|
}
|
|
catch (...)
|
|
{
|
|
iteration_cleanup();
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool callAndCatchAll(auto && f)
|
|
{
|
|
try
|
|
{
|
|
f();
|
|
return true;
|
|
}
|
|
catch (const zkutil::KeeperException & e)
|
|
{
|
|
setKeeperError(e.code, e.message());
|
|
}
|
|
catch (const Exception & e)
|
|
{
|
|
setUserError(e.code(), e.what());
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void setUserError(int code, std::string message)
|
|
{
|
|
if (retries_info.logger)
|
|
LOG_TRACE(
|
|
retries_info.logger, "ZooKeeperRetriesControl: {}/{}: setUserError: error={} message={}", retries_info.name, name, code, message);
|
|
|
|
/// if current iteration is already failed, keep initial error
|
|
if (!iteration_succeeded)
|
|
return;
|
|
|
|
iteration_succeeded = false;
|
|
user_error.code = code;
|
|
user_error.message = std::move(message);
|
|
keeper_error = KeeperError{};
|
|
}
|
|
|
|
template <typename... Args>
|
|
void setUserError(int code, fmt::format_string<Args...> fmt, Args &&... args)
|
|
{
|
|
setUserError(code, fmt::format(fmt, std::forward<Args>(args)...));
|
|
}
|
|
|
|
void setKeeperError(Coordination::Error code, std::string message)
|
|
{
|
|
if (retries_info.logger)
|
|
LOG_TRACE(
|
|
retries_info.logger, "ZooKeeperRetriesControl: {}/{}: setKeeperError: error={} message={}", retries_info.name, name, code, message);
|
|
|
|
/// if current iteration is already failed, keep initial error
|
|
if (!iteration_succeeded)
|
|
return;
|
|
|
|
iteration_succeeded = false;
|
|
keeper_error.code = code;
|
|
keeper_error.message = std::move(message);
|
|
user_error = UserError{};
|
|
}
|
|
|
|
template <typename... Args>
|
|
void setKeeperError(Coordination::Error code, fmt::format_string<Args...> fmt, Args &&... args)
|
|
{
|
|
setKeeperError(code, fmt::format(fmt, std::forward<Args>(args)...));
|
|
}
|
|
|
|
void stopRetries() { stop_retries = true; }
|
|
|
|
void requestUnconditionalRetry() { unconditional_retry = true; }
|
|
|
|
bool isLastRetry() const { return retries_info.retry_count >= retries_info.max_retries; }
|
|
|
|
bool isRetry() const { return retries_info.retry_count > 0; }
|
|
|
|
Coordination::Error getLastKeeperErrorCode() const { return keeper_error.code; }
|
|
|
|
/// action will be called only once and only after latest failed retry
|
|
void actionAfterLastFailedRetry(std::function<void()> f) { action_after_last_failed_retry = std::move(f); }
|
|
|
|
private:
|
|
struct KeeperError
|
|
{
|
|
using Code = Coordination::Error;
|
|
Code code = Code::ZOK;
|
|
std::string message;
|
|
};
|
|
|
|
struct UserError
|
|
{
|
|
int code = ErrorCodes::OK;
|
|
std::string message;
|
|
};
|
|
|
|
bool canTry()
|
|
{
|
|
++iteration_count;
|
|
/// first iteration is ordinary execution, no further checks needed
|
|
if (0 == iteration_count)
|
|
return true;
|
|
|
|
if (process_list_element && !process_list_element->checkTimeLimitSoft())
|
|
return false;
|
|
|
|
if (unconditional_retry)
|
|
{
|
|
unconditional_retry = false;
|
|
return true;
|
|
}
|
|
|
|
/// iteration succeeded -> no need to retry
|
|
if (iteration_succeeded)
|
|
{
|
|
/// avoid unnecessary logs, - print something only in case of retries
|
|
if (retries_info.logger && iteration_count > 1)
|
|
LOG_DEBUG(
|
|
retries_info.logger,
|
|
"ZooKeeperRetriesControl: {}/{}: succeeded after: iterations={} total_retries={}",
|
|
retries_info.name,
|
|
name,
|
|
iteration_count,
|
|
retries_info.retry_count);
|
|
return false;
|
|
}
|
|
|
|
if (stop_retries)
|
|
{
|
|
logLastError("stop retries on request");
|
|
action_after_last_failed_retry();
|
|
throwIfError();
|
|
return false;
|
|
}
|
|
|
|
if (retries_info.retry_count >= retries_info.max_retries)
|
|
{
|
|
logLastError("retry limit is reached");
|
|
action_after_last_failed_retry();
|
|
throwIfError();
|
|
return false;
|
|
}
|
|
|
|
/// retries
|
|
++retries_info.retry_count;
|
|
logLastError("will retry due to error");
|
|
sleepForMilliseconds(retries_info.curr_backoff_ms);
|
|
retries_info.curr_backoff_ms = std::min(retries_info.curr_backoff_ms * 2, retries_info.max_backoff_ms);
|
|
|
|
/// reset the flag, it will be set to false in case of error
|
|
iteration_succeeded = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
void throwIfError() const
|
|
{
|
|
if (user_error.code != ErrorCodes::OK)
|
|
throw Exception::createDeprecated(user_error.message, user_error.code);
|
|
|
|
if (keeper_error.code != KeeperError::Code::ZOK)
|
|
throw zkutil::KeeperException(keeper_error.message, keeper_error.code);
|
|
}
|
|
|
|
void logLastError(std::string_view header)
|
|
{
|
|
if (user_error.code == ErrorCodes::OK)
|
|
{
|
|
if (retries_info.logger)
|
|
LOG_DEBUG(
|
|
retries_info.logger,
|
|
"ZooKeeperRetriesControl: {}/{}: {}: retry_count={} timeout={}ms error={} message={}",
|
|
retries_info.name,
|
|
name,
|
|
header,
|
|
retries_info.retry_count,
|
|
retries_info.curr_backoff_ms,
|
|
keeper_error.code,
|
|
keeper_error.message);
|
|
}
|
|
else
|
|
{
|
|
if (retries_info.logger)
|
|
LOG_DEBUG(
|
|
retries_info.logger,
|
|
"ZooKeeperRetriesControl: {}/{}: {}: retry_count={} timeout={}ms error={} message={}",
|
|
retries_info.name,
|
|
name,
|
|
header,
|
|
retries_info.retry_count,
|
|
retries_info.curr_backoff_ms,
|
|
user_error.code,
|
|
user_error.message);
|
|
}
|
|
}
|
|
|
|
|
|
std::string name;
|
|
ZooKeeperRetriesInfo & retries_info;
|
|
Int64 iteration_count = -1;
|
|
UserError user_error;
|
|
KeeperError keeper_error;
|
|
std::function<void()> action_after_last_failed_retry = []() {};
|
|
bool unconditional_retry = false;
|
|
bool iteration_succeeded = true;
|
|
bool stop_retries = false;
|
|
QueryStatusPtr process_list_element;
|
|
};
|
|
|
|
}
|