Merge pull request #34802 from ClickHouse/overcommit-doc

Add documentation for memory overcommit
2024-11-24 08:32:02 +00:00 · 2022-03-25 20:50:44 +01:00 · 2022-03-25 20:50:44 +01:00 · 14e84ff6ef
commit 14e84ff6ef
parent e426a9a2f1 058b641d13
6 changed files with 90 additions and 18 deletions
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1616,3 +1616,14 @@ Possible values:

 Default value: `10000`.

+## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds}
+
+Sets maximum waiting time for global overcommit tracker.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `0`.
+
+
--- a/docs/en/operations/settings/memory-overcommit.md
+++ b/docs/en/operations/settings/memory-overcommit.md
@ -0,0 +1,31 @@
+# Memory overcommit
+
+Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries.
+
+The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use.
+When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query.
+
+When memory limit is reached any query will wait some time during atempt to allocate new memory.
+If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed.
+
+Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached.
+
+## User overcommit tracker
+
+User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list.
+Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting.
+
+Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting.
+
+**Example**
+
+```sql
+SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500
+```
+
+## Global overcommit tracker
+
+Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries.
+In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting.
+
+Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4220,10 +4220,36 @@ Possible values:
 -   0 — Disabled.
 -   1 — Enabled. The wait time equal shutdown_wait_unfinished config.

-Default value: 0.
+Default value: `0`.

 ## shutdown_wait_unfinished

 The waiting time in seconds for currently handled connections when shutdown server.

-Default Value: 5.
+Default Value: `5`.
+
+## max_guaranteed_memory_usage
+
+Maximum guaranteed memory usage for processing of single query.
+It represents soft limit in case when hard limit is reached on user level.
+Zero means unlimited.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
+
+## memory_usage_overcommit_max_wait_microseconds
+
+Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
+If the timeout is reached and memory is not freed, an exception is thrown.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
+
+## max_guaranteed_memory_usage_for_user
+
+Maximum guaranteed memory usage for processing all concurrently running queries for the user.
+It represents soft limit in case when hard limit is reached on global level.
+Zero means unlimited.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
--- a/src/Common/OvercommitTracker.cpp
+++ b/src/Common/OvercommitTracker.cpp
@ -23,6 +23,12 @@ void OvercommitTracker::setMaxWaitTime(UInt64 wait_time)

 bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker)
 {
+    // NOTE: Do not change the order of locks
+    //
+    // global_mutex must be acquired before overcommit_m, because
+    // method OvercommitTracker::unsubscribe(MemoryTracker *) is
+    // always called with already acquired global_mutex in
+    // ProcessListEntry::~ProcessListEntry().
    std::unique_lock<std::mutex> global_lock(global_mutex);
    std::unique_lock<std::mutex> lk(overcommit_m);

@ -76,7 +82,7 @@ void UserOvercommitTracker::pickQueryToExcludeImpl()
    MemoryTracker * query_tracker = nullptr;
    OvercommitRatio current_ratio{0, 0};
    // At this moment query list must be read only.
-    // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
+    // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
    auto & queries = user_process_list->queries;
    LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size());
    for (auto const & query : queries)
@ -111,9 +117,9 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
    MemoryTracker * query_tracker = nullptr;
    OvercommitRatio current_ratio{0, 0};
    // At this moment query list must be read only.
-    // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
-    LOG_DEBUG(logger, "Trying to choose query to stop");
-    process_list->processEachQueryStatus([&](DB::QueryStatus const & query)
+    // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
+    LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", process_list->size());
+    for (auto const & query : process_list->processes)
    {
        if (query.isKilled())
            return;
@ -134,7 +140,7 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
            query_tracker = memory_tracker;
            current_ratio   = ratio;
        }
-    });
+    }
    LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
        current_ratio.committed, current_ratio.soft_limit);
    picked_tracker = query_tracker;
--- a/src/Common/OvercommitTracker.h
+++ b/src/Common/OvercommitTracker.h
@ -43,8 +43,6 @@ class MemoryTracker;
 // is killed to free memory.
 struct OvercommitTracker : boost::noncopyable
 {
-    explicit OvercommitTracker(std::mutex & global_mutex_);
-
    void setMaxWaitTime(UInt64 wait_time);

    bool needToStopQuery(MemoryTracker * tracker);
@ -54,8 +52,12 @@ struct OvercommitTracker : boost::noncopyable
    virtual ~OvercommitTracker() = default;

 protected:
+    explicit OvercommitTracker(std::mutex & global_mutex_);
+
    virtual void pickQueryToExcludeImpl() = 0;

+    // This mutex is used to disallow concurrent access
+    // to picked_tracker and cancelation_state variables.
    mutable std::mutex overcommit_m;
    mutable std::condition_variable cv;

@ -87,6 +89,11 @@ private:
        }
    }

+    // Global mutex which is used in ProcessList to synchronize
+    // insertion and deletion of queries.
+    // OvercommitTracker::pickQueryToExcludeImpl() implementations
+    // require this mutex to be locked, because they read list (or sublist)
+    // of queries.
    std::mutex & global_mutex;
 };

--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@ -351,15 +351,6 @@ public:
        max_size = max_size_;
    }

-    // Before calling this method you should be sure
-    // that lock is acquired.
-    template <typename F>
-    void processEachQueryStatus(F && func) const
-    {
-        for (auto && query : processes)
-            func(query);
-    }
-
    void setMaxInsertQueriesAmount(size_t max_insert_queries_amount_)
    {
        std::lock_guard lock(mutex);