Add number of errors to ignore while choosing replicas (distributed_replica_max_ignored_errors)

This will allow avoid switching to another replica in case of error (since error can be temporary).
2024-11-24 16:42:05 +00:00 · 2020-06-13 20:32:21 +03:00 · 2020-06-13 20:32:21 +03:00 · caa195c034
commit caa195c034
parent 687eb24f50
5 changed files with 41 additions and 6 deletions
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -821,6 +821,10 @@ ClickHouse supports the following algorithms of choosing replicas:
 -   [First or random](#load_balancing-first_or_random)
 -   [Round robin](#load_balancing-round_robin)

+See also:
+
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
 ### Random (by Default) {#load_balancing-random}

 ``` sql
@ -1170,8 +1174,10 @@ Controls how fast errors in distributed tables are zeroed. If a replica is unava

 See also:

+-   [load\_balancing](#load_balancing-round_robin)
 -   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
 -   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)

 ## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap}

@ -1182,8 +1188,24 @@ Error count of each replica is capped at this value, preventing a single replica

 See also:

+-   [load\_balancing](#load_balancing-round_robin)
 -   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
 -   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
+## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors}
+
+-   Type: unsigned int
+-   Default value: 0
+
+Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
+
+See also:
+
+-   [load\_balancing](#load_balancing-round_robin)
+-   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
+-   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)

 ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms}

--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -208,7 +208,10 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g

    bool fallback_to_stale_replicas = settings ? bool(settings->fallback_to_stale_replicas_for_distributed_queries) : true;

-    return Base::getMany(min_entries, max_entries, max_tries, try_get_entry, get_priority, fallback_to_stale_replicas);
+    return Base::getMany(min_entries, max_entries, max_tries,
+        try_get_entry, get_priority,
+        fallback_to_stale_replicas,
+        settings ? settings->distributed_replica_max_ignored_errors.value : 0);
 }

 ConnectionPoolWithFailover::TryResult
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@ -111,7 +111,8 @@ public:
            size_t min_entries, size_t max_entries, size_t max_tries,
            const TryGetEntryFunc & try_get_entry,
            const GetPriorityFunc & get_priority = GetPriorityFunc(),
-            bool fallback_to_stale_replicas = true);
+            bool fallback_to_stale_replicas = true,
+            size_t max_ignored_errors = 0);

 protected:
    struct PoolState;
@ -119,7 +120,7 @@ protected:
    using PoolStates = std::vector<PoolState>;

    /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
-    PoolStates updatePoolStates();
+    PoolStates updatePoolStates(size_t max_ignored_errors);
    PoolStates getPoolStates() const;

    NestedPools nested_pools;
@ -153,10 +154,11 @@ PoolWithFailoverBase<TNestedPool>::getMany(
        size_t min_entries, size_t max_entries, size_t max_tries,
        const TryGetEntryFunc & try_get_entry,
        const GetPriorityFunc & get_priority,
-        bool fallback_to_stale_replicas)
+        bool fallback_to_stale_replicas,
+        size_t max_ignored_errors)
 {
    /// Update random numbers and error counts.
-    PoolStates pool_states = updatePoolStates();
+    PoolStates pool_states = updatePoolStates(max_ignored_errors);
    if (get_priority)
    {
        for (size_t i = 0; i < pool_states.size(); ++i)
@ -317,7 +319,7 @@ private:

 template <typename TNestedPool>
 typename PoolWithFailoverBase<TNestedPool>::PoolStates
-PoolWithFailoverBase<TNestedPool>::updatePoolStates()
+PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)
 {
    PoolStates result;
    result.reserve(nested_pools.size());
@ -363,6 +365,11 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates()

        result.assign(shared_pool_states.begin(), shared_pool_states.end());
    }
+
+    /// distributed_replica_max_ignored_errors
+    for (auto & state : result)
+        state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors);
+
    return result;
 }

--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@ -45,6 +45,8 @@
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD 60
 /// replica error max cap, this is to prevent replica from accumulating too many errors and taking to long to recover.
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT 1000
+/// Number of errors that will be ignored while choosing replicas
+#define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_ERROR_IGNORE 0

 #define DBMS_MIN_REVISION_WITH_CLIENT_INFO 54032
 #define DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE 54058
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -348,6 +348,7 @@ struct Settings : public SettingsCollection<Settings>
    \
    M(SettingSeconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
    M(SettingUInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
+    M(SettingUInt64, distributed_replica_max_ignored_errors, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_ERROR_IGNORE, "Number of errors that will be ignored while choosing replicas", 0) \
    \
    M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
    M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.", 0) \