From 38d53c38f6ad2b7298f4be7ab8c398abb6ee36d8 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 16 Sep 2020 14:36:55 +0800 Subject: [PATCH] Explicit define what first replica is. --- docs/en/operations/settings/settings.md | 2 ++ src/Client/ConnectionPoolWithFailover.cpp | 10 ++++++++-- src/Core/Settings.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 76fcfa2a616..596095c3df9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -940,6 +940,8 @@ This algorithm chooses the first replica in the set or a random replica if the f The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available. +It's possible to explicitly define what the first replica is by using the setting `load_balancing_first_offset`. This gives more control to rebalance query workloads among replicas. + ### Round Robin {#load_balancing-round_robin} ``` sql diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 6d6af794a07..68f4bcd1b76 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -56,6 +56,9 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return tryGetEntry(pool, timeouts, fail_message, settings); }; + size_t offset = 0; + if (settings) + offset = settings->load_balancing_first_offset % nested_pools.size(); GetPriorityFunc get_priority; switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { @@ -68,7 +71,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts case LoadBalancing::RANDOM: break; case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [](size_t i) -> size_t { return i >= 1; }; + get_priority = [offset](size_t i) -> size_t { return i != offset; }; break; case LoadBalancing::ROUND_ROBIN: if (last_used >= nested_pools.size()) @@ -190,6 +193,9 @@ std::vector ConnectionPoolWithFailover::g else throw DB::Exception("Unknown pool allocation mode", DB::ErrorCodes::LOGICAL_ERROR); + size_t offset = 0; + if (settings) + offset = settings->load_balancing_first_offset % nested_pools.size(); GetPriorityFunc get_priority; switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { @@ -202,7 +208,7 @@ std::vector ConnectionPoolWithFailover::g case LoadBalancing::RANDOM: break; case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [](size_t i) -> size_t { return i >= 1; }; + get_priority = [offset](size_t i) -> size_t { return i != offset; }; break; case LoadBalancing::ROUND_ROBIN: if (last_used >= nested_pools.size()) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b39c223a5e9..6a6876d0f01 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -88,6 +88,7 @@ class IColumn; M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \ \ M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.", 0) \ + M(UInt64, load_balancing_first_offset, 0, "Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.", 0) \ \ M(TotalsMode, totals_mode, TotalsMode::AFTER_HAVING_EXCLUSIVE, "How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = ‘any’ are present.", IMPORTANT) \ M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \