From ba55fd3ad2b8349b20158f5afbbe333154ee4b65 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 2 Jan 2023 08:23:44 +0000
Subject: [PATCH] Cosmetics

---
 .../settings.md                               |  2 +-
 docs/en/operations/settings/settings.md       |  2 +-
 src/Core/Settings.h                           |  4 +--
 src/Interpreters/Cache/QueryResultCache.cpp   | 31 +++++++++----------
 src/Interpreters/Cache/QueryResultCache.h     |  4 +--
 src/Interpreters/executeQuery.cpp             |  2 ++
 6 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index cf4dd042c34..517c720148f 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1276,7 +1276,7 @@ Default value: `1073741824` (ca. 1 GB.)
 
 ## query_result_cache_max_entries {#server_configuration_parameters_query-result-cache-max-entries}
 
-Maximum number of SELECT query results in the [query result cache](../query-result-cache.md).
+Maximum number of SELECT query results stored in the [query result cache](../query-result-cache.md).
 
 Possible values:
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8437713afdc..0337d143d72 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1215,7 +1215,7 @@ Default value: `0`
 
 ## query_result_cache_min_query_duration {#query-result-cache-min-query-duration}
 
-Minimum duration in milliseconds a query needs to run for its result to be cached in the [query result cache](../query-result-cache.md).
+Minimum duration in milliseconds a query needs to run for its result to be stored in the [query result cache](../query-result-cache.md).
 
 Possible values:
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c5ce6019746..b7ef3392660 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -677,14 +677,14 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
     M(Bool, enable_experimental_query_result_cache, false, "Store and retrieve results of SELECT queries in/from the query result cache", 0) \
-    M(Bool, enable_experimental_query_result_cache_passive_usage, false, "Only retrieve results of SELECT queries from the query result cache", 0) \
+    M(Bool, enable_experimental_query_result_cache_passive_usage, false, "Retrieve results of SELECT queries from the query result cache", 0) \
     M(Bool, query_result_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query result cache", 0) \
     M(UInt64, query_result_cache_size, (1ull << 30), "Maximum size of the query result cache in bytes. 0 means disabled.", 0) \
     M(UInt64, query_result_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query result cache", 0) \
     M(UInt64, query_result_cache_max_entries, (1ull << 10), "Maximum number of SELECT query results in the query result cache", 0) \
     M(UInt64, query_result_cache_max_entry_size, (1ull << 20), "Maximum size in bytes SELECT query results may have to be saved in the query result cache", 0) \
     M(UInt64, query_result_cache_max_entry_records, (1ull << 20), "Maximum number of records SELECT query results may have to be saved in the query result cache", 0) \
-    M(Milliseconds, query_result_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be cached in the query result cache.", 0) \
+    M(Milliseconds, query_result_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query result cache.", 0) \
     M(Seconds, query_result_cache_keep_seconds_alive, 60, "After this time in seconds entries in the query result cache become stale", 0) \
     M(Bool, query_result_cache_share_between_users, false, "Allow other users to access entry in the query result cache", 0) \
     M(String, query_result_cache_partition_key, "", "Represents a partition of the query result cache", 0) \
diff --git a/src/Interpreters/Cache/QueryResultCache.cpp b/src/Interpreters/Cache/QueryResultCache.cpp
index 4ecf6b30efc..845250f55b2 100644
--- a/src/Interpreters/Cache/QueryResultCache.cpp
+++ b/src/Interpreters/Cache/QueryResultCache.cpp
@@ -25,17 +25,15 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context)
     {
         const FunctionFactory & function_factory = FunctionFactory::instance();
         if (const FunctionOverloadResolverPtr resolver = function_factory.tryGet(function->name, context))
-        {
             if (!resolver->isDeterministic())
                 return true;
-        }
     }
 
-    bool has_non_cacheable_functions = false;
+    bool has_non_deterministic_functions = false;
     for (const auto & child : ast->children)
-        has_non_cacheable_functions |= astContainsNonDeterministicFunctions(child, context);
+        has_non_deterministic_functions |= astContainsNonDeterministicFunctions(child, context);
 
-    return has_non_cacheable_functions;
+    return has_non_deterministic_functions;
 }
 
 namespace
@@ -81,15 +79,15 @@ using RemoveQueryResultCacheSettingsVisitor = InDepthNodeVisitor<RemoveQueryResu
 ///       SET enable_experimental_query_result_cache = false;
 /// and
 ///   (2) SELECT expensiveComputation(...) SETTINGS max_threads = 64, enable_experimental_query_result_cache_passive_usage = true;
-/// The SELECT query in (1) and (2) is basically the same and the user expects to find the result in the query result cache. However, query
-/// results are indexed by the AST of their SELECT queries and no result will be found. Insert and retrieval behave overall more natural if
-/// settings related to the query result cache are erased from the AST key. Note that at this point the settings themselves have been parsed
-/// already, they are not lost or discarded.
+/// The SELECT queries in (1) and (2) are basically the same and the user expects that the second invocation is served from the query result
+/// cache. However, query results are indexed by their query ASTs and therefore no result will be found. Insert and retrieval behave overall
+/// more natural if settings related to the query result cache are erased from the AST key. Note that at this point the settings themselves
+/// have been parsed already, they are not lost or discarded.
 ASTPtr removeQueryResultCacheSettings(ASTPtr ast)
 {
     ASTPtr transformed_ast = ast->clone();
 
-    RemoveQueryResultCacheSettingsMatcher::Data visitor_data{};
+    RemoveQueryResultCacheSettingsMatcher::Data visitor_data;
     RemoveQueryResultCacheSettingsVisitor visitor(visitor_data);
     visitor.visit(transformed_ast);
 
@@ -145,7 +143,7 @@ auto is_stale = [](const QueryResultCache::Key & key)
 
 QueryResultCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key & key_,
     size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_,
-    size_t max_entries_,
+    size_t max_cache_entries_,
     size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
     std::chrono::milliseconds min_query_duration_)
     : mutex(mutex_)
@@ -153,7 +151,7 @@ QueryResultCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key
     , key(key_)
     , cache_size_in_bytes(cache_size_in_bytes_)
     , max_cache_size_in_bytes(max_cache_size_in_bytes_)
-    , max_entries(max_entries_)
+    , max_cache_entries(max_cache_entries_)
     , new_entry_size_in_bytes(0)
     , max_entry_size_in_bytes(max_entry_size_in_bytes_)
     , new_entry_size_in_rows(0)
@@ -163,7 +161,7 @@ QueryResultCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key
     , skip_insert(false)
 {
     if (auto it = cache.find(key); it != cache.end() && !is_stale(it->first))
-        skip_insert = true; /// Do nothing if key exists in cache and it is not expired
+        skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it
 }
 
 QueryResultCache::Writer::~Writer()
@@ -187,16 +185,16 @@ try
     };
 
     auto entry = std::make_shared<Chunk>(to_single_chunk(chunks));
-    new_entry_size_in_bytes = entry->allocatedBytes();
+    new_entry_size_in_bytes = entry->allocatedBytes(); // updated because compression potentially affects the size of the single chunk vs the aggregate size of individual chunks
 
     std::lock_guard lock(mutex);
 
     if (auto it = cache.find(key); it != cache.end() && !is_stale(it->first))
-        return; /// same check as in ctor
+        return; /// same check as in ctor because a parallel Writer could have inserted the current key in the meantime
 
     auto sufficient_space_in_cache = [this]() TSA_REQUIRES(mutex)
     {
-        return (cache_size_in_bytes + new_entry_size_in_bytes <= max_cache_size_in_bytes) && (cache.size() + 1 <= max_entries);
+        return (cache_size_in_bytes + new_entry_size_in_bytes <= max_cache_size_in_bytes) && (cache.size() + 1 <= max_cache_entries);
     };
 
     if (!sufficient_space_in_cache())
@@ -327,6 +325,7 @@ size_t QueryResultCache::recordQueryRun(const Key & key)
 
     std::lock_guard times_executed_lock(mutex);
     size_t times = ++times_executed[key];
+    // Regularly drop times_executed to avoid DOS-by-unlimited-growth.
     if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE)
         times_executed.clear();
     return times;
diff --git a/src/Interpreters/Cache/QueryResultCache.h b/src/Interpreters/Cache/QueryResultCache.h
index ceb32f7a3d3..121952c1bee 100644
--- a/src/Interpreters/Cache/QueryResultCache.h
+++ b/src/Interpreters/Cache/QueryResultCache.h
@@ -87,7 +87,7 @@ public:
         const Key key;
         size_t & cache_size_in_bytes TSA_GUARDED_BY(mutex);
         const size_t max_cache_size_in_bytes;
-        const size_t max_entries;
+        const size_t max_cache_entries;
         size_t new_entry_size_in_bytes;
         const size_t max_entry_size_in_bytes;
         size_t new_entry_size_in_rows;
@@ -99,7 +99,7 @@ public:
 
         Writer(std::mutex & mutex_, Cache & cache_, const Key & key_,
             size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_,
-            size_t max_entries_,
+            size_t max_cache_entries_,
             size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
             std::chrono::milliseconds min_query_duration_);
 
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 478ab9421c7..c5c43f1896e 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -708,6 +708,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
                 auto query_result_cache = context->getQueryResultCache();
 
+                /// If enabled, ask query result cache to serve the query instead of computing it
                 if ((settings.enable_experimental_query_result_cache || settings.enable_experimental_query_result_cache_passive_usage)
                     && query_result_cache != nullptr && res.pipeline.pulling())
                 {
@@ -720,6 +721,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                         res.pipeline = QueryPipeline(reader.getPipe());
                 }
 
+                /// If enabled, write query result into query result cache
                 if (settings.enable_experimental_query_result_cache && query_result_cache != nullptr && res.pipeline.pulling()
                   && (settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions || !astContainsNonDeterministicFunctions(ast, context)))
                 {